| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 | 
							- import suggests
 
- import networkx as nx
 
- import pyvis
 
- import time
 
- from pyvis.network import Network
 
- import pickle
 
- import dataset
 
- import codecs
 
- def hierarchy_pos(G, root, levels=None, width=1., height=1.):
 
-     '''If there is a cycle that is reachable from root, then this will see infinite recursion.
 
-        G: the graph
 
-        root: the root node
 
-        levels: a dictionary
 
-                key: level number (starting from 0)
 
-                value: number of nodes in this level
 
-        width: horizontal space allocated for drawing
 
-        height: vertical space allocated for drawing'''
 
-     TOTAL = "total"
 
-     CURRENT = "current"
 
-     def make_levels(levels, node=root, currentLevel=0, parent=None):
 
-         """Compute the number of nodes for each level
 
-         """
 
-         if not currentLevel in levels:
 
-             levels[currentLevel] = {TOTAL : 0, CURRENT : 0}
 
-         levels[currentLevel][TOTAL] += 1
 
-         neighbors = G.neighbors(node)
 
-         for neighbor in neighbors:
 
-             if not neighbor == parent:
 
-                 levels =  make_levels(levels, neighbor, currentLevel + 1, node)
 
-         return levels
 
-     def make_pos(pos, node=root, currentLevel=0, parent=None, vert_loc=0):
 
-         dx = 1/levels[currentLevel][TOTAL]
 
-         left = dx/2
 
-         pos[node] = ((left + dx*levels[currentLevel][CURRENT])*width, vert_loc)
 
-         levels[currentLevel][CURRENT] += 1
 
-         neighbors = G.neighbors(node)
 
-         for neighbor in neighbors:
 
-             if not neighbor == parent:
 
-                 pos = make_pos(pos, neighbor, currentLevel + 1, node, vert_loc-vert_gap)
 
-         return pos
 
-     if levels is None:
 
-         levels = make_levels({})
 
-     else:
 
-         levels = {l:{TOTAL: levels[l], CURRENT:0} for l in levels}
 
-     vert_gap = height / (max([l for l in levels])+1)
 
-     return make_pos({})
 
- prefix="C:\\Users\\jared\\Downloads\\"
 
- #prefix='C:\\gitlab\\AI_Video_LP\\webSite\\static\\choozmo-seo\\'
 
- #filename='videomarketing'
 
- #filename='marketing_strategy'
 
- #filename='taiwan'
 
- #filename='japan_taoyuan'
 
- filename='ARVR_raw'
 
- #filename='ARVR_google'
 
- #filename='vtuber'
 
- #filename='xsens'
 
- #filename='bike2'
 
- #filename='webinar'
 
- #filename='bike3'
 
- #filename='copywriting'
 
- #filename='openai'
 
- #filename='seo'
 
- #filename='contentmarketing'
 
- #filename='adsense'
 
- #filename='influencer'
 
- #filename='youtuber'
 
- #filename='martech'
 
- #filename='twitter'
 
- #filename='searchengine'
 
- #filename='nlp'
 
- #filename='startup'
 
- #filename='nft'
 
- table=[]
 
- #fr=codecs.open('C:/Users/jared/Downloads/davi.csv','r','utf-16')
 
- #fr=codecs.open('C:/Users/jared/Downloads/c100.csv','r','utf-16')
 
- #fr=codecs.open('C:/Users/jared/Downloads/gap.csv','r','utf-8')
 
- #fr=codecs.open('C:/Users/jared/Downloads/'+filename+'.csv','r','utf-16')
 
- #fr=codecs.open('C:/Users/jared/Downloads/ARVR_raw.csv','r','utf-16')
 
- fr=codecs.open(prefix+filename+".csv",'r','utf-16')
 
- lines=fr.readlines()
 
- fr.close()
 
- for l in lines[3:]:
 
-     elmts=l.split('\t')
 
- #    print(elmts)
 
-     print(elmts[0])
 
- #    print(elmts[2])
 
-     cnt=0
 
-     try:
 
- #        cnt=int(elmts[2])
 
-         cnt=int(elmts[1])
 
-     except:
 
-         cnt=0
 
-     table.append({'q':elmts[0],'cnt':cnt})
 
- G = nx.Graph()
 
- elmt_dict={}
 
- root=None
 
- for c in table:
 
-     q=c['q']
 
-     elmts=q.split(' ')
 
-     for e in elmts:
 
-         if elmt_dict.get(e) is None:
 
-             elmt_dict[e]=[q]
 
-         else:
 
-             elmt_dict[e].append(q)
 
- print(elmt_dict)
 
- idx=0
 
- for k,v in elmt_dict.items():
 
-     if len(v)>2 and len(v)<28 :
 
- #    if len(v)>2 and len(v)<28:
 
- #    if len(v)>2:
 
-         for e in v:
 
-             G.add_edge(k,e,weight=1)
 
-             root=k
 
-         idx+=1
 
- G.remove_edges_from( list(nx.selfloop_edges(G)))
 
- G2=nx.minimum_spanning_tree(G,weight=5)
 
- #G3=hierarchy_pos(G2,root)
 
- pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
 
- #pyG.set_options()
 
- pyG.from_nx(G2)
 
- #pyG.enable_physics(False)
 
- #pyG.barnes_hut()
 
- #pyG.show('gs.html')
 
- pyG.show(prefix+filename+'.html')
 
- print(idx)
 
 
  |