ga_key.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. import traceback
  2. import copy
  3. import operator
  4. import codecs
  5. import sys
  6. import os
  7. import searchconsole # 開發者將google search console api 包裝的套件, 方便使用, https://github.com/joshcarty/google-searchconsole
  8. import dataset
  9. import datetime
  10. import time
  11. # 取得keys位置
  12. fname = os.path.abspath(__file__) # 取得檔案絕對路徑
  13. elmts = fname.split(os.path.sep) # 透過路徑的sep去切割
  14. path2 = os.path.sep.join(elmts[0:-1]) # keys資料prefix
  15. keysdir = path2 + os.path.sep+ 'keys' + os.path.sep
  16. # 連上google search console, 透過oautth
  17. account = searchconsole.authenticate(client_config=keysdir+'client_secret.json',credentials=keysdir+'credentials.json')
  18. webproperty = account['https://hhh.com.tw/']
  19. # 連線DB
  20. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
  21. #db = dataset.connect('mysql://choozmo:pAssw0rd@127.0.0.1:3306/hhh?charset=utf8mb4')
  22. db.query('delete from hhh_weekly_keywords')
  23. table = db['hhh_weekly_keywords']
  24. records = []
  25. def gen_report(weeknum, begindate, enddate):
  26. """ 透過searchconsole套件去搜尋 """
  27. #report=webproperty.query.range('2021-01-01', '2020-01-02').dimension('page','query').get()
  28. report = webproperty.query.range(begindate, enddate).dimension('query').get() # 期限內的報表, 為一df
  29. ts = time.time()
  30. sessionid=int(ts)
  31. for r in report:
  32. # 將每一個row取出, 包裝成一個個dict搭配dataset套件於後續存入mysql table內。
  33. query=r[0]
  34. clicks=int(r[1])
  35. impressions=r[2]
  36. ctr=r[3]
  37. position=r[4]
  38. # if clicks >= 200:
  39. records.append({'sessionid':sessionid,'query':query,'clicks':clicks,'impressions':int(impressions),'ctr':float(ctr),'position':float(position),'dt':datetime.datetime.now(),'weeknum':weeknum})
  40. # gen_report('21','2021-05-24','2020-05-30')
  41. # gen_report('22','2021-05-31','2020-06-06')
  42. # gen_report('23','2021-06-07','2020-06-13')
  43. # gen_report('24','2021-06-14','2020-06-18')
  44. # gen_report('26','2021-06-21','2021-06-25')
  45. # gen_report('27','2021-06-28','2021-07-02')
  46. # gen_report('28','2021-07-02','2021-07-09')
  47. # gen_report('29','2021-07-09','2021-07-16')
  48. # gen_report('30','2021-07-16','2021-07-23')
  49. gen_report('31','2021-07-23','2021-07-30')
  50. gen_report('32','2021-07-30','2021-08-05')
  51. print('complete')
  52. # 這邊是一筆筆去塞入, 感覺不用這樣,改成insery_many
  53. db.begin() # 開啟一個tranction, 直到db commit才真正寫入。
  54. for r in records:
  55. try:
  56. table.insert(r)
  57. db.commit()
  58. except:
  59. db.rollback()
  60. print('done')
  61. # 觀察了一下, 一次最多一萬筆, 修正!
  62. # error: 不能一次資料太多進去
  63. # for i in range(len(table)//10000):
  64. # table.insert_many(records[i*10000:i*10000+10000])
  65. # print('done.')