import traceback import copy import operator import codecs import sys import os import searchconsole # 開發者將google search console api 包裝的套件, 方便使用, https://github.com/joshcarty/google-searchconsole import dataset import datetime import time # 取得keys位置 fname = os.path.abspath(__file__) # 取得檔案絕對路徑 elmts = fname.split(os.path.sep) # 透過路徑的sep去切割 path2 = os.path.sep.join(elmts[0:-1]) # keys資料prefix keysdir = path2 + os.path.sep+ 'keys' + os.path.sep # 連上google search console, 透過oautth account = searchconsole.authenticate(client_config=keysdir+'client_secret.json',credentials=keysdir+'credentials.json') webproperty = account['https://hhh.com.tw/'] # 連線DB db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4') #db = dataset.connect('mysql://choozmo:pAssw0rd@127.0.0.1:3306/hhh?charset=utf8mb4') db.query('delete from hhh_weekly_keywords') table = db['hhh_weekly_keywords'] records = [] def gen_report(weeknum, begindate, enddate): """ 透過searchconsole套件去搜尋 """ #report=webproperty.query.range('2021-01-01', '2020-01-02').dimension('page','query').get() report = webproperty.query.range(begindate, enddate).dimension('query').get() # 期限內的報表, 為一df ts = time.time() sessionid=int(ts) for r in report: # 將每一個row取出, 包裝成一個個dict搭配dataset套件於後續存入mysql table內。 query=r[0] clicks=int(r[1]) impressions=r[2] ctr=r[3] position=r[4] # if clicks >= 200: records.append({'sessionid':sessionid,'query':query,'clicks':clicks,'impressions':int(impressions),'ctr':float(ctr),'position':float(position),'dt':datetime.datetime.now(),'weeknum':weeknum}) # gen_report('21','2021-05-24','2020-05-30') # gen_report('22','2021-05-31','2020-06-06') # gen_report('23','2021-06-07','2020-06-13') # gen_report('24','2021-06-14','2020-06-18') # gen_report('26','2021-06-21','2021-06-25') # gen_report('27','2021-06-28','2021-07-02') # gen_report('28','2021-07-02','2021-07-09') # gen_report('29','2021-07-09','2021-07-16') # gen_report('30','2021-07-16','2021-07-23') gen_report('31','2021-07-23','2021-07-30') gen_report('32','2021-07-30','2021-08-05') print('complete') # 這邊是一筆筆去塞入, 感覺不用這樣,改成insery_many db.begin() # 開啟一個tranction, 直到db commit才真正寫入。 for r in records: try: table.insert(r) db.commit() except: db.rollback() print('done') # 觀察了一下, 一次最多一萬筆, 修正! # error: 不能一次資料太多進去 # for i in range(len(table)//10000): # table.insert_many(records[i*10000:i*10000+10000]) # print('done.')