gsc_tree.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. import searchconsole
  2. import dataset
  3. import os
  4. import networkx as nx
  5. from pyvis.network import Network
  6. import sqlite3
  7. db = dataset.connect('sqlite:///:memory:')
  8. table = db['tmp']
  9. # table=db['gsc_page_query_year']
  10. # pname='korea'
  11. rid = 0
  12. def checkig():
  13. global instl
  14. global table
  15. global pname
  16. global rid
  17. lst = []
  18. cntdict = {}
  19. codelist = {}
  20. idx = 0
  21. flag_break = False
  22. fname = os.path.abspath(__file__)
  23. elmts = fname.split(os.path.sep)
  24. path2 = os.path.sep.join(elmts[0:-1])
  25. keysdir = path2 + os.path.sep + '../keys' + os.path.sep
  26. account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',
  27. credentials='c:/keys/credentials.json')
  28. # webproperty = account['sc-domain:www.yogoclean.com']
  29. webproperty = account['https://www.yogoclean.com/']
  30. print(webproperty)
  31. report = webproperty.query.range('today', months=-6).dimension('page', 'query').get()
  32. print(report)
  33. # report = webproperty.query.range('2022-05-28', '2022-05-29').dimension('page', 'query').get()
  34. # account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
  35. G = nx.Graph()
  36. result = []
  37. rdict = {}
  38. total_idx = 0
  39. for r in report.rows:
  40. print(r)
  41. print(rdict)
  42. if rdict.get(r[0]) is None:
  43. total_idx += 1
  44. rid = total_idx
  45. rdict[r[0]] = rid
  46. else:
  47. rid = rdict[r[0]]
  48. entry = {'page': r[0], 'query': r[1], 'rid': rid}
  49. result.append(entry)
  50. print('list done')
  51. for r in result:
  52. table.insert(r)
  53. db.commit()
  54. print('db done')
  55. cursor = db.query(
  56. 'select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
  57. # select_data = cur.execute('select q,url,rid from tmp order by rid ')
  58. riddict = {}
  59. prev = ''
  60. curnode = ''
  61. cururl = ''
  62. total_idx = 0
  63. for data in cursor:
  64. G.add_edge(data[0], data[2], weight=3, width=3, borderwidth=3)
  65. remove = []
  66. G.remove_edges_from(nx.selfloop_edges(G))
  67. G2 = G
  68. pyG = Network(height="600px", width="100%", bgcolor="#444444", font_color="white")
  69. pyG.from_nx(G2)
  70. pyG.show('news15.html')
  71. r = checkig()