gsc_tree2.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. import searchconsole
  2. import dataset
  3. import os
  4. import networkx as nx
  5. from pyvis.network import Network
  6. import sys
  7. db = dataset.connect('sqlite:///:memory:')
  8. table=db['tmp']
  9. rid=0
  10. def checkig():
  11. global instl
  12. global table
  13. global pname
  14. global rid
  15. lst=[]
  16. cntdict={}
  17. codelist={}
  18. idx=0
  19. flag_break=False
  20. fname=os.path.abspath(__file__)
  21. elmts=fname.split(os.path.sep)
  22. path2=os.path.sep.join(elmts[0:-1])
  23. keysdir=path2+os.path.sep+'../keys'+os.path.sep
  24. # account = searchconsole.authenticate(client_config='/Users/zooeytsai/Downloads/client_secret4.json')
  25. # account = searchconsole.authenticate(client_config='c:/keys/client_secret4.json',serialize='c:/keys/credentials20220524.json')
  26. account = searchconsole.authenticate(client_config='c:/keys/client_secret4.json',credentials='c:/keys/credentials20220524.json')
  27. # account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
  28. import pprint
  29. import codecs
  30. # pprint.pprint(vars(account.service.sites))
  31. # pprint.pprint(vars(account.service))
  32. site_list = account.service.sites().list().execute()
  33. print(site_list)
  34. # print(account.service.sites().list())
  35. # fw=codecs.open('c:/tmp/pprint.txt','w','utf-8')
  36. # fw.write(pprint.pformat(vars(account)))
  37. # fw.close()
  38. # sys.exit()
  39. # credentials='credentials.json'
  40. # print(account.)
  41. # webproperty = account['https://bennis.com.tw/']
  42. webproperty = account['sc-domain:bennis.com.tw']
  43. print(webproperty)
  44. # report = webproperty.query.range('today', days=-7).dimension('query').get()
  45. report=webproperty.query.range('today', days=-4).dimension('page','query').get()
  46. print(report)
  47. # report = webproperty.query.range('2022-05-20', '2022-05-23').dimension('page', 'query').get()
  48. # account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
  49. G = nx.Graph()
  50. # webproperty = account['https://ipromise.com.tw/']
  51. # webproperty = account['sc-domain:face8ook.org']
  52. # webproperty = account['sc-domain:hhh.com.tw']
  53. result=[]
  54. rdict={}
  55. total_idx=0
  56. for r in report.rows:
  57. if rdict.get(r[0]) is None:
  58. total_idx+=1
  59. rid=total_idx
  60. rdict[r[0]]=rid
  61. else:
  62. rid=rdict[r[0]]
  63. entry={'page':r[0],'query':r[1],'rid':rid}
  64. result.append(entry)
  65. print('list done')
  66. print(result)
  67. for r in result:
  68. table.insert(r)
  69. db.commit()
  70. print('db done')
  71. # cursor=db.query('select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
  72. cursor=db.query('select query as q,page as url,rid from tmp order by rid ')
  73. riddict={}
  74. prev=''
  75. curnode=''
  76. cururl=''
  77. total_idx=0
  78. for c in cursor:
  79. G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)
  80. remove=[]
  81. G.remove_edges_from(nx.selfloop_edges(G))
  82. G2=G
  83. pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
  84. pyG.from_nx(G2)
  85. pyG.show('news.html')
  86. r=checkig()