123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- import traceback
- import dataset
- import codecs
- import sys
- import pickle
- import os
- import searchconsole
- import pandas as pd
- import networkx as nx
- import codecs
- import pyvis
- import sys
- import pickle
- import os
- import searchconsole
- from pyvis.network import Network
- import jieba
- db = dataset.connect('sqlite:///:memory:')
- table=db['tmp']
- rid=0
- def checkig():
- global instl
- global table
- global pname
- global rid
- lst=[]
- cntdict={}
- codelist={}
- idx=0
- flag_break=False
- fname=os.path.abspath(__file__)
- elmts=fname.split(os.path.sep)
- path2=os.path.sep.join(elmts[0:-1])
- keysdir=path2+os.path.sep+'../keys'+os.path.sep
- account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
- G = nx.Graph()
- webproperty = account['https://innews.com.tw/']
- report=webproperty.query.range('2022-01-01', '2022-04-16').dimension('page','query').get()
- result=[]
- rdict={}
- total_idx=0
- for r in report.rows:
- if 'hhh.com.tw/designers/cases/' not in r[0]:
- continue
- if rdict.get(r[0]) is None:
- total_idx+=1
- rid=total_idx
- rdict[r[0]]=rid
- else:
- rid=rdict[r[0]]
- entry={'page':r[0],'query':r[1],'rid':rid}
- result.append(entry)
- print('list done')
- for r in result:
- table.insert(r)
- db.commit()
- print('db done')
- cursor=db.query('select query as q,page as url,rid from tmp order by rid ')
- riddict={}
- prev=''
- curnode=''
- cururl=''
- total_idx=0
- for c in cursor:
- G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)
- remove=[]
- G.remove_edges_from(nx.selfloop_edges(G))
- G2=G
- pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
- pyG.from_nx(G2)
- pyG.show('news.html')
- r=checkig()
|