|
@@ -0,0 +1,110 @@
|
|
|
+import traceback
|
|
|
+import dataset
|
|
|
+import codecs
|
|
|
+import sys
|
|
|
+import pickle
|
|
|
+import os
|
|
|
+import searchconsole
|
|
|
+import pandas as pd
|
|
|
+import networkx as nx
|
|
|
+#import pysftp
|
|
|
+import codecs
|
|
|
+import pyvis
|
|
|
+import sys
|
|
|
+import pickle
|
|
|
+import os
|
|
|
+import searchconsole
|
|
|
+from pyvis.network import Network
|
|
|
+import jieba
|
|
|
+
|
|
|
+
|
|
|
+#db = dataset.connect('mysql://choozmo:pAssw0rd@127.0.0.1:3306/hhh?charset=utf8mb4')
|
|
|
+#db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
|
|
|
+
|
|
|
+#db.begin()
|
|
|
+db = dataset.connect('sqlite:///:memory:')
|
|
|
+table=db['tmp']
|
|
|
+#table=db['gsc_page_query_year']
|
|
|
+#pname='korea'
|
|
|
+rid=0
|
|
|
+
|
|
|
+def checkig():
|
|
|
+ global instl
|
|
|
+ global table
|
|
|
+ global pname
|
|
|
+ global rid
|
|
|
+ lst=[]
|
|
|
+ cntdict={}
|
|
|
+ codelist={}
|
|
|
+ idx=0
|
|
|
+ flag_break=False
|
|
|
+
|
|
|
+ fname=os.path.abspath(__file__)
|
|
|
+ elmts=fname.split(os.path.sep)
|
|
|
+ path2=os.path.sep.join(elmts[0:-1])
|
|
|
+ keysdir=path2+os.path.sep+'../keys'+os.path.sep
|
|
|
+
|
|
|
+ account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
|
|
|
+# account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
|
|
|
+ G = nx.Graph()
|
|
|
+
|
|
|
+# webproperty = account['https://ipromise.com.tw/']
|
|
|
+# webproperty = account['sc-domain:face8ook.org']
|
|
|
+# webproperty = account['sc-domain:hhh.com.tw']
|
|
|
+# webproperty = account['sc-domain:hhh.com.tw']
|
|
|
+
|
|
|
+# webproperty = account['https://www.damanwoo.com/']
|
|
|
+ webproperty = account['https://innews.com.tw/']
|
|
|
+
|
|
|
+# report=webproperty.query.range('2021-03-01', '2021-06-17').dimension('page','query').get()
|
|
|
+# report=webproperty.query.range('2021-06-01', '2021-06-17').dimension('page','query').get()
|
|
|
+# report=webproperty.query.range('2020-06-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/(491|31|293|278|31|24|594|356|307|491|33|385)', 'equals').get()
|
|
|
+# report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
|
|
|
+# report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
|
|
|
+ report=webproperty.query.range('2022-01-01', '2022-04-16').dimension('page','query').get()
|
|
|
+
|
|
|
+ result=[]
|
|
|
+ rdict={}
|
|
|
+ total_idx=0
|
|
|
+
|
|
|
+ for r in report.rows:
|
|
|
+ if 'hhh.com.tw/designers/cases/' not in r[0]:
|
|
|
+ continue
|
|
|
+ if rdict.get(r[0]) is None:
|
|
|
+ total_idx+=1
|
|
|
+ rid=total_idx
|
|
|
+ rdict[r[0]]=rid
|
|
|
+ else:
|
|
|
+ rid=rdict[r[0]]
|
|
|
+ entry={'page':r[0],'query':r[1],'rid':rid}
|
|
|
+ result.append(entry)
|
|
|
+
|
|
|
+ print('list done')
|
|
|
+
|
|
|
+ for r in result:
|
|
|
+ table.insert(r)
|
|
|
+ db.commit()
|
|
|
+ print('db done')
|
|
|
+
|
|
|
+# cursor=db.query('select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
|
|
|
+ cursor=db.query('select query as q,page as url,rid from tmp order by rid ')
|
|
|
+
|
|
|
+ riddict={}
|
|
|
+ prev=''
|
|
|
+ curnode=''
|
|
|
+ cururl=''
|
|
|
+
|
|
|
+ total_idx=0
|
|
|
+ for c in cursor:
|
|
|
+ G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)
|
|
|
+ remove=[]
|
|
|
+ G.remove_edges_from(nx.selfloop_edges(G))
|
|
|
+ G2=G
|
|
|
+ pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
|
|
|
+
|
|
|
+ pyG.from_nx(G2)
|
|
|
+ pyG.show('news.html')
|
|
|
+
|
|
|
+
|
|
|
+r=checkig()
|
|
|
+
|