report_to_sheet.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. #!/usr/bin/python3
  2. import sys
  3. import codecs
  4. import traceback
  5. import requests
  6. import re
  7. import pandas as pd
  8. import random
  9. import urllib
  10. import dataset
  11. import json
  12. import gspread
  13. import datetime
  14. from gspread_pandas import Spread, Client
  15. from oauth2client.service_account import ServiceAccountCredentials
  16. import os
  17. import threading
  18. def save_sheet(df,filename,tabname,startpos='A1'):
  19. scope = ['https://spreadsheets.google.com/feeds',
  20. 'https://www.googleapis.com/auth/drive']
  21. credentials = ServiceAccountCredentials.from_json_keyfile_name('c:\\keys\\spread2.json', scope)
  22. # credentials = ServiceAccountCredentials.from_json_keyfile_name('/var/keys/spread2.json', scope)
  23. gc = gspread.authorize(credentials)
  24. spread = Spread(filename,creds=credentials)
  25. spread.df_to_sheet(df, index=False, sheet=tabname, start=startpos, replace=False)
  26. def do_jobs():
  27. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
  28. # cursor=db.query("SELECT * FROM hhh.rank_details where dt > '2021-06-30 00:00:00' order by dt desc")
  29. cursor=db.query("SELECT * FROM hhh.rank_summary where dt > '2021-06-30 00:00:00' order by dt desc")
  30. win_lst={}
  31. c100_lst={}
  32. searchome_lst={}
  33. hhh_lst={}
  34. other_lst={}
  35. lost_lst={}
  36. df_records=[]
  37. for c in cursor:
  38. # print(c['designer'],c['title'],c['dt'],c['searchome'],c['c_100'],c['hhh'])
  39. title=c['title']
  40. dt=c['dt']
  41. searchome=int(c['searchome'])
  42. c_100=int(c['c_100'])
  43. hhh=int(c['hhh'])
  44. designer=c['designer']
  45. # df_records.append({'designer':designer,'title':title,'dt':dt,'searchome':searchome,'c_100':c_100,'hhh':hhh})
  46. if hhh > 0 and c_100==-1 and searchome ==-1:
  47. win_lst[designer]=hhh
  48. df_records.append({'designer':designer,'title':title,'dt':dt,'a1':'幸福空間('+str(hhh)+')','a2':'競業未進首頁','a3':'競業未進首頁'})
  49. if hhh > 0 and searchome ==-1:
  50. if c_100 !=-1 and hhh < c_100:
  51. win_lst[designer]=hhh
  52. df_records.append({'designer':designer,'title':title,'dt':dt,'a1':'幸福空間('+str(hhh)+')','a2':'100設計('+str(c_100)+')','a3':'競業未進首頁'})
  53. if c_100 !=-1 and hhh > c_100:
  54. c100_lst[designer]=1
  55. df_records.append({'designer':designer,'title':title,'dt':dt,'a1':'100設計('+str(c_100)+')','a2':'幸福空間('+str(hhh)+')','a3':'競業未進首頁'})
  56. if hhh > 0 and searchome!=-1 and searchome<hhh:
  57. if win_lst.get(designer) is None:
  58. if c100_lst.get(designer) is None:
  59. searchome_lst[designer]=searchome
  60. lost_lst[designer]=hhh
  61. df_records.append({'designer':designer,'title':title,'dt':dt,'a1':'設計家('+str(searchome)+')','a2':'幸福空間('+str(hhh)+')','a3':'競業未進首頁'})
  62. if hhh > 0 and searchome!=-1 and hhh<searchome:
  63. if win_lst.get(designer) is None:
  64. if searchome_lst.get(designer) is None:
  65. hhh_lst[designer]=hhh
  66. df_records.append({'designer':designer,'title':title,'dt':dt,'a1':'幸福空間('+str(hhh)+')','a2':'設計家('+str(searchome)+')','a3':'競業未進首頁'})
  67. if win_lst.get(designer) is None and hhh_lst.get(designer) is None and searchome_lst.get(designer) is None and c100_lst.get(designer) is None:
  68. print(designer)
  69. other_lst[designer]=1
  70. df_records.append({'designer':designer,'title':title,'dt':dt,'a1':'幸福空間未進首頁','a2':'','a3':''})
  71. print(len(win_lst.keys()))
  72. print(len(searchome_lst.keys()))
  73. print(len(hhh_lst.keys()))
  74. print(len(c100_lst.keys()))
  75. print(len(other_lst.keys()))
  76. vv=0
  77. for k,v in lost_lst.items():
  78. vv+=v
  79. ww=0
  80. for k,v in searchome_lst.items():
  81. ww+=v
  82. print(vv/len(lost_lst.items()))
  83. print(ww/len(searchome_lst.items()))
  84. print(hhh_lst)
  85. idx=0
  86. df = pd.DataFrame(columns=('designer','a1','a2','a3','dt','url'))
  87. uniq={}
  88. for r in df_records:
  89. if uniq.get(r['designer']) is None:
  90. uniq[r['designer']]=1
  91. else:
  92. continue
  93. # if r['searchome']==-1:
  94. # r['searchome']='未進首頁'
  95. # if r['c_100']==-1:
  96. # r['c_100']='未進首頁'
  97. # if r['hhh']==-1:
  98. # r['hhh']='未進首頁'
  99. df.loc[idx]=[r['designer'],r['a1'],r['a2'],r['a3'],r['dt'],'https://www.google.com/search?q='+r['designer']]
  100. # df.loc[idx]=['okok',333]
  101. idx+=1
  102. # idx=0
  103. # for c in cursor:
  104. # df.loc[idx]=[c['query'],c['users'],c['newusers'],c['pageviews'],c['bouncerate'],c['pvpersession'],c['url']]
  105. # df.loc[idx]=['okok',333]
  106. # idx+=1
  107. save_sheet(df,'SEO_designer_results','ranking')
  108. # save_sheet(df,'ga_x_gsc','with_url')
  109. t = threading.Thread(target = do_jobs)
  110. t.start()
  111. t.join()