123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- import searchconsole
- import dataset
- import os
- import networkx as nx
- from pyvis.network import Network
- import sqlite3
- import csv
- import sys
- import codecs
- import difflib
- kwdict={}
- G = nx.Graph()
- def gcm0(strings):
- clusters = {}
- for string in (x.strip() for x in strings):
- match = difflib.get_close_matches(string, clusters.keys(), 8, 0.65)
- if match:
- clusters[match[0]].append(string)
- else:
- clusters[string] = [ string ]
- return clusters
- def proc_row(row):
- elmts=row.split(' ')
- for elmt in elmts:
- if kwdict.get(elmt) is None:
- kwdict[elmt]=1
- else:
- kwdict[elmt]+=1
- with codecs.open('C:\\tmp\\test9.csv', 'r','utf-16') as csvfile:
- spamreader = csv.reader(csvfile, delimiter='\t', quotechar='|')
- kwdict={}
- addict={}
- head=True
- rowlst=[]
- for row in spamreader:
- if head:
- head=False
- continue
- ll=len(row)
- proc_row(row[0])
- rowlst.append(row[0])
- clusters=gcm0(rowlst)
- keys=[]
- for k,v in clusters.items():
- keys.append(k)
- for x in v:
- G.add_edge(k,x,weight=1,label='')
- already_dict={}
- from strsimpy.qgram import QGram
- qgram = QGram(2)
- for k1 in keys:
- for k2 in keys:
- if k1!=k2:
- if qgram.distance(k1, k2)<=12:
- if already_dict.get(k1) is None and already_dict.get(k2) is None:
- already_dict[k1]=1
- already_dict[k2]=1
- G.add_edge(k1,k2,weight=1,label='')
- pyG = Network(height="900px", width="100%", bgcolor="#444444", font_color="white")
- pyG.set_options("""
- const options = {
- "nodes" : {
- "font" : {
- "size" : "30",
- "color" : "#ffffff"
- }
- },
- "physics": {
- "forceAtlas2Based": {
- "springLength": 100
- },
- "maxVelocity": 150,
- "minVelocity": 0.28,
- "solver": "forceAtlas2Based"
- }
- }
- """)
- G.remove_edges_from(nx.selfloop_edges(G))
- pyG.from_nx(G)
- #pyG.show_buttons(filter_=['physics'])
- pyG.show('news17.html')
- #print(clusters)
- sys.exit()
|