1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- import traceback
- import copy
- import operator
- import codecs
- import sys
- import os
- import searchconsole # 開發者將google search console api 包裝的套件, 方便使用, https://github.com/joshcarty/google-searchconsole
- import dataset
- import datetime
- import time
- # 取得keys位置
- fname = os.path.abspath(__file__) # 取得檔案絕對路徑
- elmts = fname.split(os.path.sep) # 透過路徑的sep去切割
- path2 = os.path.sep.join(elmts[0:-1]) # keys資料prefix
- keysdir = path2 + os.path.sep+ 'keys' + os.path.sep
- # 連上google search console, 透過oautth
- account = searchconsole.authenticate(client_config=keysdir+'client_secret.json',credentials=keysdir+'credentials.json')
- webproperty = account['https://hhh.com.tw/']
- # 連線DB
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
- #db = dataset.connect('mysql://choozmo:pAssw0rd@127.0.0.1:3306/hhh?charset=utf8mb4')
- db.query('delete from hhh_weekly_keywords')
- table = db['hhh_weekly_keywords']
- records = []
- def gen_report(weeknum, begindate, enddate):
- """ 透過searchconsole套件去搜尋 """
- #report=webproperty.query.range('2021-01-01', '2020-01-02').dimension('page','query').get()
- report = webproperty.query.range(begindate, enddate).dimension('query').get() # 期限內的報表, 為一df
- ts = time.time()
- sessionid=int(ts)
- for r in report:
- # 將每一個row取出, 包裝成一個個dict搭配dataset套件於後續存入mysql table內。
- query=r[0]
- clicks=int(r[1])
- impressions=r[2]
- ctr=r[3]
- position=r[4]
- # if clicks >= 200:
- records.append({'sessionid':sessionid,'query':query,'clicks':clicks,'impressions':int(impressions),'ctr':float(ctr),'position':float(position),'dt':datetime.datetime.now(),'weeknum':weeknum})
- # gen_report('21','2021-05-24','2020-05-30')
- # gen_report('22','2021-05-31','2020-06-06')
- # gen_report('23','2021-06-07','2020-06-13')
- # gen_report('24','2021-06-14','2020-06-18')
- # gen_report('26','2021-06-21','2021-06-25')
- # gen_report('27','2021-06-28','2021-07-02')
- # gen_report('28','2021-07-02','2021-07-09')
- # gen_report('29','2021-07-09','2021-07-16')
- # gen_report('30','2021-07-16','2021-07-23')
- gen_report('31','2021-07-23','2021-07-30')
- gen_report('32','2021-07-30','2021-08-05')
- print('complete')
- # 這邊是一筆筆去塞入, 感覺不用這樣,改成insery_many
- db.begin() # 開啟一個tranction, 直到db commit才真正寫入。
- for r in records:
- try:
- table.insert(r)
- db.commit()
- except:
- db.rollback()
- print('done')
- # 觀察了一下, 一次最多一萬筆, 修正!
- # error: 不能一次資料太多進去
- # for i in range(len(table)//10000):
- # table.insert_many(records[i*10000:i*10000+10000])
- # print('done.')
|