|
@@ -1,110 +1,84 @@
|
|
|
-import traceback
|
|
|
+import searchconsole
|
|
|
import dataset
|
|
|
-import codecs
|
|
|
-import sys
|
|
|
-import pickle
|
|
|
import os
|
|
|
-import searchconsole
|
|
|
-import pandas as pd
|
|
|
import networkx as nx
|
|
|
-#import pysftp
|
|
|
-import codecs
|
|
|
-import pyvis
|
|
|
-import sys
|
|
|
-import pickle
|
|
|
-import os
|
|
|
-import searchconsole
|
|
|
from pyvis.network import Network
|
|
|
-import jieba
|
|
|
-
|
|
|
-
|
|
|
-#db = dataset.connect('mysql://choozmo:pAssw0rd@127.0.0.1:3306/hhh?charset=utf8mb4')
|
|
|
-#db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
|
|
|
+import sqlite3
|
|
|
|
|
|
-#db.begin()
|
|
|
db = dataset.connect('sqlite:///:memory:')
|
|
|
-table=db['tmp']
|
|
|
-#table=db['gsc_page_query_year']
|
|
|
-#pname='korea'
|
|
|
-rid=0
|
|
|
+table = db['tmp']
|
|
|
+# table=db['gsc_page_query_year']
|
|
|
+# pname='korea'
|
|
|
+rid = 0
|
|
|
+
|
|
|
|
|
|
def checkig():
|
|
|
global instl
|
|
|
global table
|
|
|
global pname
|
|
|
global rid
|
|
|
- lst=[]
|
|
|
- cntdict={}
|
|
|
- codelist={}
|
|
|
- idx=0
|
|
|
- flag_break=False
|
|
|
-
|
|
|
- fname=os.path.abspath(__file__)
|
|
|
- elmts=fname.split(os.path.sep)
|
|
|
- path2=os.path.sep.join(elmts[0:-1])
|
|
|
- keysdir=path2+os.path.sep+'../keys'+os.path.sep
|
|
|
-
|
|
|
- account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
|
|
|
-# account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
|
|
|
+ lst = []
|
|
|
+ cntdict = {}
|
|
|
+ codelist = {}
|
|
|
+ idx = 0
|
|
|
+ flag_break = False
|
|
|
+
|
|
|
+ fname = os.path.abspath(__file__)
|
|
|
+ elmts = fname.split(os.path.sep)
|
|
|
+ path2 = os.path.sep.join(elmts[0:-1])
|
|
|
+ keysdir = path2 + os.path.sep + '../keys' + os.path.sep
|
|
|
+
|
|
|
+ account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',
|
|
|
+ credentials='c:/keys/credentials.json')
|
|
|
+ # webproperty = account['sc-domain:www.yogoclean.com']
|
|
|
+ webproperty = account['https://www.yogoclean.com/']
|
|
|
+ print(webproperty)
|
|
|
+ report = webproperty.query.range('today', months=-6).dimension('page', 'query').get()
|
|
|
+ print(report)
|
|
|
+ # report = webproperty.query.range('2022-05-28', '2022-05-29').dimension('page', 'query').get()
|
|
|
+ # account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
|
|
|
G = nx.Graph()
|
|
|
-
|
|
|
-# webproperty = account['https://ipromise.com.tw/']
|
|
|
-# webproperty = account['sc-domain:face8ook.org']
|
|
|
-# webproperty = account['sc-domain:hhh.com.tw']
|
|
|
-# webproperty = account['sc-domain:hhh.com.tw']
|
|
|
-
|
|
|
-# webproperty = account['https://www.damanwoo.com/']
|
|
|
- webproperty = account['https://innews.com.tw/']
|
|
|
-
|
|
|
-# report=webproperty.query.range('2021-03-01', '2021-06-17').dimension('page','query').get()
|
|
|
-# report=webproperty.query.range('2021-06-01', '2021-06-17').dimension('page','query').get()
|
|
|
-# report=webproperty.query.range('2020-06-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/(491|31|293|278|31|24|594|356|307|491|33|385)', 'equals').get()
|
|
|
-# report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
|
|
|
-# report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
|
|
|
- report=webproperty.query.range('2022-01-01', '2022-04-16').dimension('page','query').get()
|
|
|
-
|
|
|
- result=[]
|
|
|
- rdict={}
|
|
|
- total_idx=0
|
|
|
-
|
|
|
+
|
|
|
+ result = []
|
|
|
+ rdict = {}
|
|
|
+ total_idx = 0
|
|
|
for r in report.rows:
|
|
|
- if 'hhh.com.tw/designers/cases/' not in r[0]:
|
|
|
- continue
|
|
|
+ print(r)
|
|
|
+ print(rdict)
|
|
|
if rdict.get(r[0]) is None:
|
|
|
- total_idx+=1
|
|
|
- rid=total_idx
|
|
|
- rdict[r[0]]=rid
|
|
|
+ total_idx += 1
|
|
|
+ rid = total_idx
|
|
|
+ rdict[r[0]] = rid
|
|
|
else:
|
|
|
- rid=rdict[r[0]]
|
|
|
- entry={'page':r[0],'query':r[1],'rid':rid}
|
|
|
+ rid = rdict[r[0]]
|
|
|
+ entry = {'page': r[0], 'query': r[1], 'rid': rid}
|
|
|
result.append(entry)
|
|
|
-
|
|
|
+
|
|
|
print('list done')
|
|
|
-
|
|
|
for r in result:
|
|
|
table.insert(r)
|
|
|
db.commit()
|
|
|
print('db done')
|
|
|
-
|
|
|
-# cursor=db.query('select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
|
|
|
- cursor=db.query('select query as q,page as url,rid from tmp order by rid ')
|
|
|
-
|
|
|
- riddict={}
|
|
|
- prev=''
|
|
|
- curnode=''
|
|
|
- cururl=''
|
|
|
-
|
|
|
- total_idx=0
|
|
|
- for c in cursor:
|
|
|
- G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)
|
|
|
- remove=[]
|
|
|
+
|
|
|
+ cursor = db.query(
|
|
|
+ 'select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
|
|
|
+ # select_data = cur.execute('select q,url,rid from tmp order by rid ')
|
|
|
+
|
|
|
+ riddict = {}
|
|
|
+ prev = ''
|
|
|
+ curnode = ''
|
|
|
+ cururl = ''
|
|
|
+
|
|
|
+ total_idx = 0
|
|
|
+ for data in cursor:
|
|
|
+ G.add_edge(data[0], data[2], weight=3, width=3, borderwidth=3)
|
|
|
+ remove = []
|
|
|
G.remove_edges_from(nx.selfloop_edges(G))
|
|
|
- G2=G
|
|
|
- pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
|
|
|
-
|
|
|
+ G2 = G
|
|
|
+ pyG = Network(height="600px", width="100%", bgcolor="#444444", font_color="white")
|
|
|
+
|
|
|
pyG.from_nx(G2)
|
|
|
- pyG.show('news.html')
|
|
|
-
|
|
|
+ pyG.show('news15.html')
|
|
|
|
|
|
-r=checkig()
|
|
|
|
|
|
+r = checkig()
|