seo_monitor.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. #!/usr/bin/python
  2. # -*- coding: UTF-8 -*-
  3. import pandas as pd
  4. from datetime import datetime
  5. import pymysql
  6. pymysql.install_as_MySQLdb()
  7. import dataset
  8. import requests
  9. import gspread
  10. from gspread_pandas import Spread, Client
  11. from oauth2client.service_account import ServiceAccountCredentials
  12. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  13. data = pd.DataFrame()
  14. scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
  15. credentials = ServiceAccountCredentials.from_json_keyfile_name("choozmointernal-2e314f3d4e12.json", scope)
  16. gc = gspread.authorize(credentials)
  17. def notify_group(msg):
  18. gid='WekCRfnAirSiSxALiD6gcm0B56EejsoK89zFbIaiZQD'
  19. headers = {"Authorization": "Bearer " + gid,"Content-Type": "application/x-www-form-urlencoded"}
  20. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params={"message": msg})
  21. def download(client, date, newest=True):
  22. """關鍵字清單"""
  23. lst_kw = []
  24. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  25. cursor_kw = db.query(f"SELECT term FROM seo.selected_kw where client='{client}'")
  26. for c in cursor_kw:
  27. for i in c.values():
  28. lst_kw.append(i)
  29. print(len(lst_kw))
  30. """抓取general_log關鍵字"""
  31. last_check_lst = []
  32. for i in lst_kw:
  33. if newest:
  34. cursor_last_check = db.query(f"select kw,ranking from seo.general_log where kw='{i}' order by dt desc limit 1")
  35. else:
  36. cursor_last_check = db.query(f"select kw,ranking from seo.general_log where kw='{i}' order by dt limit 1")
  37. group = []
  38. n = 0
  39. for c in cursor_last_check:
  40. for j in c.values():
  41. if 'site' in str(j):
  42. n+=1
  43. if n==1 and j==1:
  44. group.append(101)
  45. else:
  46. group.append(j)
  47. n = 0
  48. last_check_lst.append(group)
  49. # df2 = pd.DataFrame(not_match_kw_lst,columns=['關鍵字組','搜尋排名'])
  50. df3 = pd.DataFrame(last_check_lst, columns=['關鍵字組', '搜尋排名'])
  51. # df_all = pd.concat([df, df3])
  52. df3.to_csv(f"./排名CSV/{client}當日排名{date}.csv", index=False, encoding='utf_8_sig')
  53. db.close()
  54. def combine_df(client, date1, date2):
  55. df = pd.read_csv(f'./排名CSV/{client}當日排名{date1}.csv')
  56. df2 = pd.read_csv(f'./排名CSV/{client}當日排名{date2}.csv')
  57. df_result = pd.merge(df, df2, on='關鍵字組', how='outer').fillna(0)
  58. date1 = arrange_date(date1)
  59. date2 = arrange_date(date2)
  60. df_result.columns = ['kw', 'starting_rank', 'current_rank']
  61. p = df_result['starting_rank'].tolist()
  62. l = df_result['current_rank'].tolist()
  63. c = [u"\u2197 "+str(abs(int(i)-int(j))) if int(i)-int(j)>0 \
  64. else u"\u2198 "+str(abs(int(i)-int(j))) if int(i)-int(j)<0 \
  65. else u"\u2192 "+str(abs(int(i)-int(j))) for (i, j) in zip(p, l)]
  66. df_result['client'] = client
  67. df_result['change'] = c
  68. df_result['kw'] = df_result['kw'].str.split('site',0).str[0]
  69. df_result.to_csv(f"./排名CSV/combine/{client}排名變化.csv", index=False, encoding='utf_8_sig')
  70. return df_result, date1, date2
  71. def arrange_date(date):
  72. if date[0] == 0 :
  73. date = date[1]+'/'+date[2:]
  74. else:
  75. date = date[:2]+'/'+date[2:]
  76. return date
  77. def checkrank(data, targetcust, targetkw):
  78. '''
  79. msg = "以下是排名下降的關鍵字:\n"
  80. msg2 = "以下是目前排名於百名外的關鍵字:\n"
  81. '''
  82. msg = "以下是亮點字的績效:\n"
  83. flag1=0
  84. flag2=0
  85. for row in data.index:
  86. #print(row)
  87. delta = int(data['starting_rank'][row])-int(data['current_rank'][row])
  88. for t in targetkw:
  89. try:
  90. w = data['kw'][row]
  91. if ((t in w) and data['client'][row] in targetcust):
  92. msg += "[" + data['client'][row] + "] " + data['kw'][row] + ": " + str(int(data['starting_rank'][row])) + " -> " + str(int(data['current_rank'][row])) + " (" + str(int(delta)) + ")\n"
  93. except:
  94. pass
  95. '''
  96. if (delta < 0):
  97. msg += "[" + data['client'][row] + "] " + data['kw'][row] + ": " + str(int(data['starting_rank'][row])) + " -> " + str(int(data['current_rank'][row])) + " (" + str(int(delta)) + ")\n"
  98. flag1=1
  99. if (int(data['current_rank'][row]) == 101): #out of range...
  100. msg2 += "[" + data['client'][row] + "] " + data['kw'][row] + "\n"
  101. flag2=1
  102. '''
  103. print(msg)
  104. #print(msg2)
  105. return msg
  106. #return msg, msg2, flag1, flag2
  107. if __name__ == "__main__":
  108. date1 = '0707'
  109. date2 = '0717'
  110. lst = ['清原', '神助物流', '班尼斯', '有夠讚', '仁本', '火柴星人', '呷茶', 'ChoozMo']
  111. sheet = gc.open('SEO 亮點字')
  112. sheet_instance = sheet.get_worksheet(0)
  113. records_data = sheet_instance.get_all_records()
  114. #print(records_data[0])
  115. targetkw = []
  116. for rd in records_data:
  117. targetkw.append(rd["亮點字"])
  118. print(targetkw)
  119. for client in lst:
  120. date1 = '0707'
  121. date2 = '0718'
  122. download(client, date1, newest=False)
  123. download(client, date2, newest=True)
  124. res, d1, d2 = combine_df(client, date1, date2)
  125. data = data.append(res, ignore_index=True)
  126. data.to_csv(f"./排名CSV/combine/__排名變化.csv", index=False, encoding='utf_8_sig')
  127. spread = Spread("kw_ranking_tracker",creds=credentials)
  128. spread.df_to_sheet(data, index=False, sheet="main", start="A1", replace=False)
  129. time_stamp = datetime.now()
  130. time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
  131. spread.sheet.update('G1', '最後更新:' + time_stamp)
  132. #msg, msg2, flag1, flag2=checkrank(data, lst, targetkw)
  133. msg = checkrank(data, lst, targetkw)
  134. msg += "\n完整資料請至https://cmm.ai/seo1"
  135. #msg2 += "\n完整資料請至https://cmm.ai/seo1"
  136. msg01 = [msg[i:i+1000] for i in range(0, len(msg), 1000)]
  137. for m1 in msg01:
  138. notify_group(m1)
  139. print(m1)
  140. '''
  141. if flag1 == 1:
  142. msg01 = [msg[i:i+1000] for i in range(0, len(msg), 1000)]
  143. for m1 in msg01:
  144. notify_group(m1)
  145. print(m1)
  146. if flag2 == 1:
  147. msg02 = [msg2[i:i+1000] for i in range(0, len(msg2), 1000)]
  148. for m2 in msg02:
  149. notify_group(m2)
  150. print(m2)
  151. '''