123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- import traceback
- import dataset
- import codecs
- import sys
- import pickle
- import os
- import searchconsole
- import pandas as pd
- import networkx as nx
- #import pysftp
- import codecs
- import pyvis
- import sys
- import pickle
- import os
- import searchconsole
- from pyvis.network import Network
- import jieba
- #db = dataset.connect('mysql://choozmo:pAssw0rd@127.0.0.1:3306/hhh?charset=utf8mb4')
- #db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
- #db.begin()
- db = dataset.connect('sqlite:///:memory:')
- table=db['tmp']
- #table=db['gsc_page_query_year']
- #pname='korea'
- rid=0
- def checkig():
- global instl
- global table
- global pname
- global rid
- lst=[]
- cntdict={}
- codelist={}
- idx=0
- flag_break=False
- fname=os.path.abspath(__file__)
- elmts=fname.split(os.path.sep)
- path2=os.path.sep.join(elmts[0:-1])
- keysdir=path2+os.path.sep+'../keys'+os.path.sep
- account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
- # account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
- G = nx.Graph()
- # webproperty = account['https://ipromise.com.tw/']
- # webproperty = account['sc-domain:face8ook.org']
- # webproperty = account['sc-domain:hhh.com.tw']
- # webproperty = account['sc-domain:hhh.com.tw']
- # webproperty = account['https://www.damanwoo.com/']
- webproperty = account['https://innews.com.tw/']
- # report=webproperty.query.range('2021-03-01', '2021-06-17').dimension('page','query').get()
- # report=webproperty.query.range('2021-06-01', '2021-06-17').dimension('page','query').get()
- # report=webproperty.query.range('2020-06-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/(491|31|293|278|31|24|594|356|307|491|33|385)', 'equals').get()
- # report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
- # report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
- report=webproperty.query.range('2022-01-01', '2022-04-16').dimension('page','query').get()
- result=[]
- rdict={}
- total_idx=0
- for r in report.rows:
- if 'hhh.com.tw/designers/cases/' not in r[0]:
- continue
- if rdict.get(r[0]) is None:
- total_idx+=1
- rid=total_idx
- rdict[r[0]]=rid
- else:
- rid=rdict[r[0]]
- entry={'page':r[0],'query':r[1],'rid':rid}
- result.append(entry)
- print('list done')
- for r in result:
- table.insert(r)
- db.commit()
- print('db done')
- # cursor=db.query('select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
- cursor=db.query('select query as q,page as url,rid from tmp order by rid ')
- riddict={}
- prev=''
- curnode=''
- cururl=''
- total_idx=0
- for c in cursor:
- G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)
- remove=[]
- G.remove_edges_from(nx.selfloop_edges(G))
- G2=G
- pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
- pyG.from_nx(G2)
- pyG.show('news.html')
- r=checkig()
|