ranking_world.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. # import redis
  2. import time
  3. import traceback
  4. # import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import os
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import dataset
  13. from selenium.webdriver.common.keys import Keys
  14. import json
  15. import random
  16. import time
  17. import sys
  18. import codecs
  19. import pandas as pd
  20. import random
  21. import os
  22. import time
  23. import datetime
  24. from selenium.webdriver.chrome.service import Service
  25. import dataset
  26. import pymysql
  27. import undetected_chromedriver as uc
  28. pymysql.install_as_MySQLdb()
  29. import requests
  30. driver = None
  31. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  32. headers = {
  33. "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
  34. "Content-Type": "application/x-www-form-urlencoded"
  35. }
  36. def send_msg(kw):
  37. params = {"message": kw}
  38. r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
  39. def re_get_webdriver():
  40. global port
  41. global driver
  42. result = []
  43. if driver is not None:
  44. print('closing....')
  45. driver.quit()
  46. os.system('killall chrome')
  47. print('quit....')
  48. driver = None
  49. try:
  50. s = Service('C:\/Users\/s1301\/Downloads\/136\/chromedriver-win32\/chromedriver.exe')
  51. options = uc.ChromeOptions()
  52. options.add_argument("--window-size=200,100") # 縮小視窗
  53. options.add_argument("--window-position=-32000,-32000") # 移到螢幕外
  54. driver = uc.Chrome(options=options,service=s)
  55. driver.delete_all_cookies()
  56. return driver
  57. except:
  58. traceback.print_exc()
  59. driver = None
  60. return None
  61. def scrolling(driver, pgnum):
  62. ub = driver.find_element("css selector",'body')
  63. for i in range(pgnum):
  64. ub.send_keys(Keys.PAGE_DOWN)
  65. if pgnum > 1:
  66. time.sleep(0.3)
  67. def run_once(jsobj):
  68. table = db['google_rank']
  69. date = jsobj['date']
  70. print(jsobj)
  71. global driver
  72. i = 100
  73. if driver is None:
  74. time.sleep(8)
  75. re_get_webdriver()
  76. if driver is None:
  77. return
  78. try:
  79. kw = jsobj['kw']
  80. fname = jsobj['fname']
  81. url = jsobj['url']
  82. # if jsobj.get('domain') is None:
  83. # exclude = jsobj['exclude']
  84. # domain = None
  85. # else:
  86. # domain = jsobj['domain']
  87. # exclude = None
  88. city_map = {'chicago': ['42.04866173771369', '-87.68260072643513'],
  89. 'miami': ['25.764458843530548', '-80.19787522585152'],
  90. 'wc': ['38.96071674051165', '-77.03155367248387'],
  91. 'ny': ['40.76774141099703', '-73.98439238945637']}
  92. city = jsobj['fname']
  93. print(city)
  94. # Map_coordinates = dict({
  95. # "latitude": float(city_map[f"{city}"][0]),
  96. # "longitude": float(city_map[f"{city}"][1]),
  97. # "accuracy": 100
  98. # })
  99. # 芝加哥、邁阿密、紐約、華盛頓
  100. # driver.execute_cdp_cmd("Emulation.setGeolocationOverride", Map_coordinates)
  101. # driver.get('https://www.google.com?num=100&lr=lang_en')
  102. driver.get(url)
  103. # print(driver.current_url)
  104. # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  105. # time.sleep(1)
  106. # elmt.send_keys(kw)
  107. #
  108. # elmt.send_keys(Keys.ENTER)
  109. time.sleep(3)
  110. scrolling(driver, 10)
  111. time.sleep(60)
  112. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
  113. numresults = len(elmts)
  114. print('搜尋結果數量', numresults)
  115. time.sleep(20)
  116. driver.save_screenshot(r"C:\/Users\/s1301\/Downloads\/" +date +fname + ".png")
  117. if numresults == 0:
  118. send_msg('stop working...')
  119. sys.exit()
  120. datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': [], '結果說明': []}
  121. df = pd.DataFrame()
  122. idx = 1
  123. found = False
  124. test_lst = []
  125. txt_dict = {}
  126. for elmt in elmts:
  127. href = elmt.get_attribute('href')
  128. txt = elmt.text
  129. desc = None
  130. try:
  131. elmt2 = elmt.find_element(By.XPATH, "./../../..//div[@data-content-feature=1]")
  132. desc = elmt2.text
  133. except:
  134. desc = None
  135. # print(desc)
  136. table.insert(
  137. {'title': elmt.text, 'url': href, 'keyword': kw, 'dt': datetime.datetime.now(), 'ranking': idx,'description':fname})
  138. datadict['搜尋詞'].append(kw)
  139. datadict['結果標題'].append(txt)
  140. datadict['結果網址'].append(href)
  141. datadict['結果名次'].append(str(idx))
  142. datadict['結果說明'].append(desc)
  143. # if len(txt) > 10:
  144. # if domain is not None:
  145. # for d in domain:
  146. # if d in href:
  147. # print('found....')
  148. # print('clicked....')
  149. # print(href)
  150. # print(txt)
  151. # print("ranking", idx)
  152. # found = True
  153. # return
  154. # else:
  155. # ex = False
  156. # for ee in exclude:
  157. # if ee in href:
  158. # ex = True
  159. # if not ex:
  160. # test_lst.append(elmt)
  161. # txt_dict[elmt] = txt
  162. idx += 1
  163. # if exclude is not None:
  164. # print('exclude')
  165. # elmt = random.choice(test_lst[5:])
  166. # print(elmt)
  167. # print(txt_dict[elmt])
  168. #
  169. # webdriver.ActionChains(driver).move_to_element(elmt).perform()
  170. # # elmt.click()
  171. # webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  172. # time.sleep(5)
  173. #
  174. # if not found:
  175. # table.insert(
  176. # {'ranking': -1, 'kw': kw, 'results': numresults, 'url': '', 'title': '未收錄', 'descrption': desc})
  177. df['搜尋詞'] = datadict['搜尋詞']
  178. df['結果標題'] = datadict['結果標題']
  179. df['結果網址'] = datadict['結果網址']
  180. df['結果名次'] = datadict['結果名次']
  181. df['結果說明'] = datadict['結果說明']
  182. df.to_csv(r"C:\/Users\/s1301\/Downloads\/"+date+fname+".csv")
  183. except:
  184. print('exception')
  185. traceback.print_exc()
  186. # time.sleep(9999)
  187. # driver.save_screenshot('c:/tmp/seo/'+kw+".png")
  188. driver.quit()
  189. sys.exit()
  190. d = {'ny':"https://www.google.com/search?q=angelo+koo&num=100&sca_esv=a67a64356a29d00d&hl=en&gl=us&sxsrf=ADLYWIJM6j_9LFNgPC_A_jo1ZldZjt50mA:1732591817687&ei=yUBFZ4vRKfChvr0Pv9CqkQQ&start=1&sa=N&sstk=ATObxK74c-Vmj60wMyZAkg6KYknAUt3xCeh6PWZn_0VWHs666zLLIqx32MESjFiY6pUy0nmojNGr9rdIBk7GqZETOYTma4FcqskAWg&ved=2ahUKEwjL2dW-h_mJAxXwkK8BHT-oKkIQ8tMDegQICRAE&biw=1274&bih=689&dpr=2.24",
  191. 'wc':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICItV2FzaGluZ3RvbixEaXN0cmljdCBvZiBDb2x1bWJpYSxVbml0ZWQgU3RhdGVz&gws_rd=cr#gws_rd=cr&ip=1",
  192. 'miami':"https://www.google.com/search?q=angelo+koo&num=100&sca_esv=a67a64356a29d00d&hl=en&gl=us&sxsrf=ADLYWIKPS2ivSJI7PP3_HRmy4H7dkLevCA:1732591895799&ei=F0FFZ7m_MKjZ1e8P9raGmQg&start=1&sa=N&sstk=ATObxK5Pv_BLMA5qsBfP9ByNdQYL8X07y597zLV907rx32LLXesARM2JfmOiSHWxmUJ07LOGiijKZVDzFt_Bdou4pzivtTd0I34uyA&ved=2ahUKEwj5pvXjh_mJAxWobPUHHXabIYMQ8tMDegQICxAE&biw=1274&bih=689&dpr=2.24",
  193. 'chicago':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr"}
  194. d_3 = {
  195. 'jp':'https://www.google.com/search?q=angelo+koo&hl=ja&gl=jp&num=100&uule=w+CAIQICIHTm9reW8sSmFwYW4',
  196. 'ru':'https://www.google.com/search?q=angelo+koo&hl=ru&gl=ru&num=100&uule=w+CAIQICIGTW9zY293',
  197. 'fr':'https://www.google.com/search?q=angelo+koo&hl=fr&gl=fr&num=100&uule=w+CAIQICIGUGFyaXM'}
  198. location = 'fr'
  199. run_once({'kw':'angelo koo','fname':location,'date':'0703','url':d_3[location]})
  200. ####手動截圖:須按右下角的設定選擇區域######
  201. ny="https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr"
  202. wc="https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICItV2FzaGluZ3RvbixEaXN0cmljdCBvZiBDb2x1bWJpYSxVbml0ZWQgU3RhdGVz&gws_rd=cr#gws_rd=cr&ip=1"
  203. miami="https://www.google.com/search?q=angelo+koo&num=100&sca_esv=a67a64356a29d00d&hl=en&gl=us&sxsrf=ADLYWIKPS2ivSJI7PP3_HRmy4H7dkLevCA:1732591895799&ei=F0FFZ7m_MKjZ1e8P9raGmQg&start=1&sa=N&sstk=ATObxK5Pv_BLMA5qsBfP9ByNdQYL8X07y597zLV907rx32LLXesARM2JfmOiSHWxmUJ07LOGiijKZVDzFt_Bdou4pzivtTd0I34uyA&ved=2ahUKEwj5pvXjh_mJAxWobPUHHXabIYMQ8tMDegQICxAE&biw=1274&bih=689&dpr=2.24"
  204. chicago="https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr"
  205. # d = {'ny':"https://www.google.com/search?q=angelo+koo&num=100&sca_esv=a67a64356a29d00d&hl=en&gl=us&sxsrf=ADLYWIJM6j_9LFNgPC_A_jo1ZldZjt50mA:1732591817687&ei=yUBFZ4vRKfChvr0Pv9CqkQQ&start=1&sa=N&sstk=ATObxK74c-Vmj60wMyZAkg6KYknAUt3xCeh6PWZn_0VWHs666zLLIqx32MESjFiY6pUy0nmojNGr9rdIBk7GqZETOYTma4FcqskAWg&ved=2ahUKEwjL2dW-h_mJAxXwkK8BHT-oKkIQ8tMDegQICRAE&biw=1274&bih=689&dpr=2.24",
  206. # 'wc':"https://www.google.com/search?q=angelo+koo&num=100&sca_esv=a67a64356a29d00d&hl=en&gl=us&sxsrf=ADLYWILuIc3D0f7Biau98sqFUUZogBqCDQ:1732591880729&ei=CEFFZ4OWLLno1e8Pz8DawQE&start=1&sa=N&sstk=ATObxK7tiLCo3LSdvk03JNDl8jRF2TRafES_b3y_Zdu-tOEeei8BkqsWFkySS3UhrMToLj9XaJoMfO9cbVlcbpzBmHQBcmpw4NTxYw&ved=2ahUKEwiDut3ch_mJAxU5dPUHHU-gNhgQ8tMDegQIDRAE&biw=1274&bih=689&dpr=2.24",
  207. # 'miami':"https://www.google.com/search?q=angelo+koo&num=100&sca_esv=a67a64356a29d00d&hl=en&gl=us&sxsrf=ADLYWIKPS2ivSJI7PP3_HRmy4H7dkLevCA:1732591895799&ei=F0FFZ7m_MKjZ1e8P9raGmQg&start=1&sa=N&sstk=ATObxK5Pv_BLMA5qsBfP9ByNdQYL8X07y597zLV907rx32LLXesARM2JfmOiSHWxmUJ07LOGiijKZVDzFt_Bdou4pzivtTd0I34uyA&ved=2ahUKEwj5pvXjh_mJAxWobPUHHXabIYMQ8tMDegQICxAE&biw=1274&bih=689&dpr=2.24",
  208. # 'chicago':"https://www.google.com/search?q=angelo+koo&num=100&sca_esv=a67a64356a29d00d&hl=en&gl=us&sxsrf=ADLYWILJICQpK9Ihu-LtB3GE2lKjcKpsjQ:1732591304207&ei=yD5FZ5iyDPbq1e8PuM3msAU&start=1&sa=N&sstk=ATObxK7FwISMQrob8SCRdfsBYNwoBYcZNm8OzwvSVkL1SgQphssf9htXgWrFb0IS39QEWx_4yuAOLwF4FszOrem0NnpL_n-dJrgjH5OB9rtGSRm7ykBoy-ju3sLbo_wUL_Kd&ved=2ahUKEwiYtunJhfmJAxV2dfUHHbimGVY4yAEQ8tMDegQIDBAJ&biw=1275&bih=689&dpr=2.24"}