gap_tree.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. import suggests
  2. import networkx as nx
  3. import pyvis
  4. import time
  5. from pyvis.network import Network
  6. import pickle
  7. import dataset
  8. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
  9. #cursor=db.query('select q,cnt from gap_hhh where cnt > 100')
  10. #cursor=db.query('select q,cnt from gap_searchome where cnt > 50')
  11. cursor=db.query('SELECT q,cnt FROM hhh.gap_searchome where q not in (select q from hhh.gap_hhh) order by cnt desc;')
  12. def hierarchy_pos(G, root, levels=None, width=1., height=1.):
  13. '''If there is a cycle that is reachable from root, then this will see infinite recursion.
  14. G: the graph
  15. root: the root node
  16. levels: a dictionary
  17. key: level number (starting from 0)
  18. value: number of nodes in this level
  19. width: horizontal space allocated for drawing
  20. height: vertical space allocated for drawing'''
  21. TOTAL = "total"
  22. CURRENT = "current"
  23. def make_levels(levels, node=root, currentLevel=0, parent=None):
  24. """Compute the number of nodes for each level
  25. """
  26. if not currentLevel in levels:
  27. levels[currentLevel] = {TOTAL : 0, CURRENT : 0}
  28. levels[currentLevel][TOTAL] += 1
  29. neighbors = G.neighbors(node)
  30. for neighbor in neighbors:
  31. if not neighbor == parent:
  32. levels = make_levels(levels, neighbor, currentLevel + 1, node)
  33. return levels
  34. def make_pos(pos, node=root, currentLevel=0, parent=None, vert_loc=0):
  35. dx = 1/levels[currentLevel][TOTAL]
  36. left = dx/2
  37. pos[node] = ((left + dx*levels[currentLevel][CURRENT])*width, vert_loc)
  38. levels[currentLevel][CURRENT] += 1
  39. neighbors = G.neighbors(node)
  40. for neighbor in neighbors:
  41. if not neighbor == parent:
  42. pos = make_pos(pos, neighbor, currentLevel + 1, node, vert_loc-vert_gap)
  43. return pos
  44. if levels is None:
  45. levels = make_levels({})
  46. else:
  47. levels = {l:{TOTAL: levels[l], CURRENT:0} for l in levels}
  48. vert_gap = height / (max([l for l in levels])+1)
  49. return make_pos({})
  50. G = nx.Graph()
  51. elmt_dict={}
  52. root=None
  53. for c in cursor:
  54. q=c['q']
  55. elmts=q.split(' ')
  56. for e in elmts:
  57. if elmt_dict.get(e) is None:
  58. elmt_dict[e]=[q]
  59. else:
  60. elmt_dict[e].append(q)
  61. print(elmt_dict)
  62. idx=0
  63. for k,v in elmt_dict.items():
  64. if len(v)>2 and len(v)<20:
  65. for e in v:
  66. G.add_edge(k,e,weight=1)
  67. root=k
  68. idx+=1
  69. G.remove_edges_from( list(nx.selfloop_edges(G)))
  70. G2=nx.minimum_spanning_tree(G,weight=5)
  71. #G3=hierarchy_pos(G2,root)
  72. pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
  73. #pyG.set_options()
  74. pyG.from_nx(G2)
  75. #pyG.enable_physics(False)
  76. #pyG.barnes_hut()
  77. pyG.show('gs.html')
  78. print(idx)