Jared 2 år sedan
förälder
incheckning
d38947b381
1 ändrade filer med 101 tillägg och 0 borttagningar
  1. 101 0
      choozmo/kw_plan_tree.py

+ 101 - 0
choozmo/kw_plan_tree.py

@@ -0,0 +1,101 @@
+import searchconsole
+import dataset
+import os
+import networkx as nx
+from pyvis.network import Network
+import sqlite3
+import csv
+import sys
+import codecs
+import difflib
+
+
+
+kwdict={}
+G = nx.Graph()
+
+
+
+def gcm0(strings):
+    clusters = {}
+    for string in (x.strip() for x in strings):
+        match = difflib.get_close_matches(string, clusters.keys(), 8, 0.65)
+        if match:
+            clusters[match[0]].append(string)
+        else:
+            clusters[string] = [ string ]
+    return clusters
+
+
+def proc_row(row):
+    elmts=row.split(' ')
+    for elmt in elmts:
+        if kwdict.get(elmt) is None:
+            kwdict[elmt]=1
+        else:
+            kwdict[elmt]+=1
+
+with codecs.open('C:\\tmp\\test.csv', 'r','utf-16') as csvfile:
+    spamreader = csv.reader(csvfile, delimiter='\t', quotechar='|')
+    kwdict={}
+    addict={}
+    head=True
+    rowlst=[]
+    for row in spamreader:
+        if head:
+            head=False
+            continue
+        ll=len(row)
+        proc_row(row[0])
+        rowlst.append(row[0])
+
+
+clusters=gcm0(rowlst)
+keys=[]
+for k,v in clusters.items():
+    keys.append(k)
+    for x in v:
+        G.add_edge(k,x,weight=1,label='')
+
+
+already_dict={}
+from strsimpy.qgram import QGram
+qgram = QGram(2)
+for k1 in keys:
+    for k2 in keys:
+        if k1!=k2:
+            if qgram.distance(k1, k2)<=12:
+                if already_dict.get(k1) is None and already_dict.get(k2) is None:
+                    already_dict[k1]=1
+                    already_dict[k2]=1
+                    G.add_edge(k1,k2,weight=1,label='')
+
+
+pyG = Network(height="600px", width="100%", bgcolor="#444444", font_color="white")
+pyG.set_options("""
+const options = {
+"nodes" : {
+        "font" : {
+            "size" : "30",
+            "color" : "#ffffff"
+        }
+    },
+  "physics": {
+    "forceAtlas2Based": {
+      "springLength": 100
+    },
+    "maxVelocity": 150,
+    "minVelocity": 0.28,
+    "solver": "forceAtlas2Based"
+  }
+}
+""")
+G.remove_edges_from(nx.selfloop_edges(G))
+pyG.from_nx(G)
+
+#pyG.show_buttons(filter_=['physics'])
+pyG.show('news17.html')
+
+#print(clusters)
+
+sys.exit()