Jared пре 2 година
родитељ
комит
9511fb711a
1 измењених фајлова са 110 додато и 0 уклоњено
  1. 110 0
      choozmo/gsc_tree.py

+ 110 - 0
choozmo/gsc_tree.py

@@ -0,0 +1,110 @@
+import traceback
+import dataset
+import codecs
+import sys
+import pickle
+import os
+import searchconsole
+import pandas as pd
+import networkx as nx
+#import pysftp
+import codecs
+import pyvis
+import sys
+import pickle
+import os
+import searchconsole
+from pyvis.network import Network
+import jieba
+
+
+#db = dataset.connect('mysql://choozmo:pAssw0rd@127.0.0.1:3306/hhh?charset=utf8mb4')
+#db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
+
+#db.begin()
+db = dataset.connect('sqlite:///:memory:')
+table=db['tmp']
+#table=db['gsc_page_query_year']
+#pname='korea'
+rid=0
+
+def checkig():
+    global instl
+    global table
+    global pname
+    global rid
+    lst=[]
+    cntdict={}
+    codelist={}
+    idx=0
+    flag_break=False
+
+    fname=os.path.abspath(__file__)
+    elmts=fname.split(os.path.sep)
+    path2=os.path.sep.join(elmts[0:-1])
+    keysdir=path2+os.path.sep+'../keys'+os.path.sep
+
+    account = searchconsole.authenticate(client_config='c:/keys/client_secret.json',credentials='c:/keys/credentials.json')
+#    account = searchconsole.authenticate(client_config='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\client_secret.json',credentials='C:\\gitlab\\kw_tools\\kw_tools\\hhh\\keys\\credentials.json')
+    G = nx.Graph()
+
+#    webproperty = account['https://ipromise.com.tw/']
+#    webproperty = account['sc-domain:face8ook.org']
+#    webproperty = account['sc-domain:hhh.com.tw']
+#   webproperty = account['sc-domain:hhh.com.tw']
+
+#    webproperty = account['https://www.damanwoo.com/']
+    webproperty = account['https://innews.com.tw/']
+
+#    report=webproperty.query.range('2021-03-01', '2021-06-17').dimension('page','query').get()
+#    report=webproperty.query.range('2021-06-01', '2021-06-17').dimension('page','query').get()
+#    report=webproperty.query.range('2020-06-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/(491|31|293|278|31|24|594|356|307|491|33|385)', 'equals').get()
+#    report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
+#    report=webproperty.query.range('2020-03-01', '2021-06-22').dimension('page','query').filter('page', '/designers/cases/'+pgnum, 'contains').get()
+    report=webproperty.query.range('2022-01-01', '2022-04-16').dimension('page','query').get()
+
+    result=[]
+    rdict={}
+    total_idx=0
+
+    for r in report.rows:
+        if 'hhh.com.tw/designers/cases/' not in r[0]:
+            continue
+        if rdict.get(r[0]) is None:
+            total_idx+=1
+            rid=total_idx
+            rdict[r[0]]=rid
+        else:
+            rid=rdict[r[0]]
+        entry={'page':r[0],'query':r[1],'rid':rid}
+        result.append(entry)
+
+    print('list done')
+
+    for r in result:
+        table.insert(r)
+    db.commit()
+    print('db done')
+
+#    cursor=db.query('select query as q,page as url,rid from tmp where rid in (select rid from (select rid,count(*) from tmp group by rid having count(*) > 2 and count(*) < 6) as tbl1) order by rid ')
+    cursor=db.query('select query as q,page as url,rid from tmp order by rid ')
+
+    riddict={}
+    prev=''
+    curnode=''
+    cururl=''
+
+    total_idx=0
+    for c in cursor:
+        G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)
+    remove=[]
+    G.remove_edges_from(nx.selfloop_edges(G))
+    G2=G
+    pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")
+
+    pyG.from_nx(G2)
+    pyG.show('news.html')
+
+
+r=checkig()
+