csv_gap_tree.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. import suggests
  2. import networkx as nx
  3. import pyvis
  4. import time
  5. from pyvis.network import Network
  6. import pickle
  7. import dataset
  8. import codecs
  9. def hierarchy_pos(G, root, levels=None, width=1., height=1.):
  10. '''If there is a cycle that is reachable from root, then this will see infinite recursion.
  11. G: the graph
  12. root: the root node
  13. levels: a dictionary
  14. key: level number (starting from 0)
  15. value: number of nodes in this level
  16. width: horizontal space allocated for drawing
  17. height: vertical space allocated for drawing'''
  18. TOTAL = "total"
  19. CURRENT = "current"
  20. def make_levels(levels, node=root, currentLevel=0, parent=None):
  21. """Compute the number of nodes for each level
  22. """
  23. if not currentLevel in levels:
  24. levels[currentLevel] = {TOTAL : 0, CURRENT : 0}
  25. levels[currentLevel][TOTAL] += 1
  26. neighbors = G.neighbors(node)
  27. for neighbor in neighbors:
  28. if not neighbor == parent:
  29. levels = make_levels(levels, neighbor, currentLevel + 1, node)
  30. return levels
  31. def make_pos(pos, node=root, currentLevel=0, parent=None, vert_loc=0):
  32. dx = 1/levels[currentLevel][TOTAL]
  33. left = dx/2
  34. pos[node] = ((left + dx*levels[currentLevel][CURRENT])*width, vert_loc)
  35. levels[currentLevel][CURRENT] += 1
  36. neighbors = G.neighbors(node)
  37. for neighbor in neighbors:
  38. if not neighbor == parent:
  39. pos = make_pos(pos, neighbor, currentLevel + 1, node, vert_loc-vert_gap)
  40. return pos
  41. if levels is None:
  42. levels = make_levels({})
  43. else:
  44. levels = {l:{TOTAL: levels[l], CURRENT:0} for l in levels}
  45. vert_gap = height / (max([l for l in levels])+1)
  46. return make_pos({})
  47. prefix='C:\\gitlab\\AI_Video_LP\\webSite\\static\\choozmo-seo\\'
  48. #filename='videomarketing'
  49. #filename='marketing_strategy'
  50. #filename='taiwan'
  51. filename='japan_taoyuan'
  52. table=[]
  53. #fr=codecs.open('C:/Users/jared/Downloads/davi.csv','r','utf-16')
  54. #fr=codecs.open('C:/Users/jared/Downloads/c100.csv','r','utf-16')
  55. #fr=codecs.open('C:/Users/jared/Downloads/gap.csv','r','utf-8')
  56. fr=codecs.open('C:/Users/jared/Downloads/'+filename+'.csv','r','utf-16')
  57. lines=fr.readlines()
  58. fr.close()
  59. for l in lines[3:]:
  60. elmts=l.split('\t')
  61. # print(elmts)
  62. print(elmts[0])
  63. # print(elmts[2])
  64. cnt=0
  65. try:
  66. # cnt=int(elmts[2])
  67. cnt=int(elmts[1])
  68. except:
  69. cnt=0
  70. table.append({'q':elmts[0],'cnt':cnt})
  71. G = nx.Graph()
  72. elmt_dict={}
  73. root=None
  74. for c in table:
  75. q=c['q']
  76. elmts=q.split(' ')
  77. for e in elmts:
  78. if elmt_dict.get(e) is None:
  79. elmt_dict[e]=[q]
  80. else:
  81. elmt_dict[e].append(q)
  82. print(elmt_dict)
  83. idx=0
  84. for k,v in elmt_dict.items():
  85. if len(v)>2 :
  86. # if len(v)>2 and len(v)<28:
  87. # if len(v)>2:
  88. for e in v:
  89. G.add_edge(k,e,weight=1)
  90. root=k
  91. idx+=1
  92. G.remove_edges_from( list(nx.selfloop_edges(G)))
  93. G2=nx.minimum_spanning_tree(G,weight=5)
  94. #G3=hierarchy_pos(G2,root)
  95. pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
  96. #pyG.set_options()
  97. pyG.from_nx(G2)
  98. #pyG.enable_physics(False)
  99. #pyG.barnes_hut()
  100. #pyG.show('gs.html')
  101. pyG.show(prefix+filename+'.html')
  102. print(idx)