123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- import suggests
- import networkx as nx
- import pyvis
- import time
- from pyvis.network import Network
- import pickle
- import dataset
- import codecs
- def hierarchy_pos(G, root, levels=None, width=1., height=1.):
- '''If there is a cycle that is reachable from root, then this will see infinite recursion.
- G: the graph
- root: the root node
- levels: a dictionary
- key: level number (starting from 0)
- value: number of nodes in this level
- width: horizontal space allocated for drawing
- height: vertical space allocated for drawing'''
- TOTAL = "total"
- CURRENT = "current"
- def make_levels(levels, node=root, currentLevel=0, parent=None):
- """Compute the number of nodes for each level
- """
- if not currentLevel in levels:
- levels[currentLevel] = {TOTAL : 0, CURRENT : 0}
- levels[currentLevel][TOTAL] += 1
- neighbors = G.neighbors(node)
- for neighbor in neighbors:
- if not neighbor == parent:
- levels = make_levels(levels, neighbor, currentLevel + 1, node)
- return levels
- def make_pos(pos, node=root, currentLevel=0, parent=None, vert_loc=0):
- dx = 1/levels[currentLevel][TOTAL]
- left = dx/2
- pos[node] = ((left + dx*levels[currentLevel][CURRENT])*width, vert_loc)
- levels[currentLevel][CURRENT] += 1
- neighbors = G.neighbors(node)
- for neighbor in neighbors:
- if not neighbor == parent:
- pos = make_pos(pos, neighbor, currentLevel + 1, node, vert_loc-vert_gap)
- return pos
- if levels is None:
- levels = make_levels({})
- else:
- levels = {l:{TOTAL: levels[l], CURRENT:0} for l in levels}
- vert_gap = height / (max([l for l in levels])+1)
- return make_pos({})
- prefix="C:\\Users\\jared\\Downloads\\"
- #prefix='C:\\gitlab\\AI_Video_LP\\webSite\\static\\choozmo-seo\\'
- #filename='videomarketing'
- #filename='marketing_strategy'
- #filename='taiwan'
- #filename='japan_taoyuan'
- filename='ARVR_raw'
- #filename='ARVR_google'
- #filename='vtuber'
- #filename='xsens'
- #filename='bike2'
- #filename='webinar'
- #filename='bike3'
- #filename='copywriting'
- #filename='openai'
- #filename='seo'
- #filename='contentmarketing'
- #filename='adsense'
- #filename='influencer'
- #filename='youtuber'
- #filename='martech'
- #filename='twitter'
- #filename='searchengine'
- #filename='nlp'
- #filename='startup'
- #filename='nft'
- table=[]
- #fr=codecs.open('C:/Users/jared/Downloads/davi.csv','r','utf-16')
- #fr=codecs.open('C:/Users/jared/Downloads/c100.csv','r','utf-16')
- #fr=codecs.open('C:/Users/jared/Downloads/gap.csv','r','utf-8')
- #fr=codecs.open('C:/Users/jared/Downloads/'+filename+'.csv','r','utf-16')
- #fr=codecs.open('C:/Users/jared/Downloads/ARVR_raw.csv','r','utf-16')
- fr=codecs.open(prefix+filename+".csv",'r','utf-16')
- lines=fr.readlines()
- fr.close()
- for l in lines[3:]:
- elmts=l.split('\t')
- # print(elmts)
- print(elmts[0])
- # print(elmts[2])
- cnt=0
- try:
- # cnt=int(elmts[2])
- cnt=int(elmts[1])
- except:
- cnt=0
- table.append({'q':elmts[0],'cnt':cnt})
- G = nx.Graph()
- elmt_dict={}
- root=None
- for c in table:
- q=c['q']
- elmts=q.split(' ')
- for e in elmts:
- if elmt_dict.get(e) is None:
- elmt_dict[e]=[q]
- else:
- elmt_dict[e].append(q)
- print(elmt_dict)
- idx=0
- for k,v in elmt_dict.items():
- if len(v)>2 and len(v)<28 :
- # if len(v)>2 and len(v)<28:
- # if len(v)>2:
- for e in v:
- G.add_edge(k,e,weight=1)
- root=k
- idx+=1
- G.remove_edges_from( list(nx.selfloop_edges(G)))
- G2=nx.minimum_spanning_tree(G,weight=5)
- #G3=hierarchy_pos(G2,root)
- pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
- #pyG.set_options()
- pyG.from_nx(G2)
- #pyG.enable_physics(False)
- #pyG.barnes_hut()
- #pyG.show('gs.html')
- pyG.show(prefix+filename+'.html')
- print(idx)
|