import traceback import dataset import codecs import sys import pickle import os import searchconsole import pandas as pd import networkx as nx #import pysftp import codecs import pyvis import sys import pickle import os import searchconsole from pyvis.network import Network import jieba #db = dataset.connect('mysql://choozmo:pAssw0rd@127.0.0.1:3306/hhh?charset=utf8mb4') #db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4') #db.begin() db = dataset.connect('sqlite:///:memory:') table=db['tmp'] #table=db['gsc_page_query_year'] #pname='korea' rid=0 def checkig(): global instl global table global pname global rid lst=[] cntdict={} codelist={} idx=0 flag_break=False fname=os.path.abspath(__file__) elmts=fname.split(os.path.sep) path2=os.path.sep.join(elmts[0:-1]) keysdir=path2+os.path.sep+'../keys'+os.path.sep account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json') # account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json') G = nx.Graph() # webproperty = account['https://ipromise.com.tw/'] # webproperty = account['sc-domain:face8ook.org'] # webproperty = account['sc-domain:hhh.com.tw'] # webproperty = account['sc-domain:hhh.com.tw'] # webproperty = account['https://www.damanwoo.com/'] webproperty = account['https://innews.com.tw/'] # report=webproperty.query.range('2021-03-01', '2021-06-17').dimension('page','query').get() # report=webproperty.query.range('2021-06-01', '2021-06-17').dimension('page','query').get() # report=webproperty.query.range('2020-06-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/(491|31|293|278|31|24|594|356|307|491|33|385)', 'equals').get() # report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get() # report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get() report=webproperty.query.range('2022-01-01', '2022-04-16').dimension('page','query').get() result=[] rdict={} total_idx=0 for r in report.rows: if 'hhh.com.tw/designers/cases/' not in r[0]: continue if rdict.get(r[0]) is None: total_idx+=1 rid=total_idx rdict[r[0]]=rid else: rid=rdict[r[0]] entry={'page':r[0],'query':r[1],'rid':rid} result.append(entry) print('list done') for r in result: table.insert(r) db.commit() print('db done') # cursor=db.query('select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ') cursor=db.query('select query as q,page as url,rid from tmp order by rid ') riddict={} prev='' curnode='' cururl='' total_idx=0 for c in cursor: G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3) remove=[] G.remove_edges_from(nx.selfloop_edges(G)) G2=G pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white") pyG.from_nx(G2) pyG.show('news.html') r=checkig()