ranking_world.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. # import redis
  2. import time
  3. import traceback
  4. # import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import os
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import dataset
  13. from selenium.webdriver.common.keys import Keys
  14. import json
  15. import random
  16. import time
  17. import redis
  18. import sys
  19. import codecs
  20. import pandas as pd
  21. import random
  22. import os
  23. import time
  24. import datetime
  25. from selenium.webdriver.chrome.service import Service
  26. import dataset
  27. import pymysql
  28. pymysql.install_as_MySQLdb()
  29. from userAgentRandomizer import userAgents
  30. import requests
  31. driver = None
  32. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  33. headers = {
  34. "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
  35. "Content-Type": "application/x-www-form-urlencoded"
  36. }
  37. def send_msg(kw):
  38. params = {"message": kw}
  39. r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
  40. def re_get_webdriver():
  41. global port
  42. global driver
  43. result = []
  44. if driver is not None:
  45. print('closing....')
  46. driver.quit()
  47. os.system('killall chrome')
  48. print('quit....')
  49. driver = None
  50. try:
  51. ua = userAgents()
  52. user_agent = ua.random()
  53. options = webdriver.ChromeOptions()
  54. options.add_argument("--no-sandbox")
  55. options.add_argument("--disable-dev-shm-usage")
  56. # options.add_argument("--headless")
  57. print(user_agent)
  58. options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'})
  59. options.add_argument("--incognito")
  60. driver = None
  61. try:
  62. if os.name == 'nt':
  63. driver = webdriver.Chrome(options=options)
  64. else:
  65. driver = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Downloads\chromedriver_108\chromedriver', options=options)
  66. except:
  67. traceback.print_exc()
  68. return
  69. driver.delete_all_cookies()
  70. driver.set_window_size(950, 20000)
  71. return
  72. except:
  73. traceback.print_exc()
  74. driver = None
  75. return None
  76. def scrolling(driver, pgnum):
  77. ub = driver.find_element("css selector",'body')
  78. for i in range(pgnum):
  79. ub.send_keys(Keys.PAGE_DOWN)
  80. if pgnum > 1:
  81. time.sleep(0.3)
  82. def run_once(jsobj):
  83. table = db['google_rank']
  84. date = jsobj['date']
  85. print(jsobj)
  86. global driver
  87. i = 100
  88. if driver is None:
  89. time.sleep(8)
  90. re_get_webdriver()
  91. if driver is None:
  92. return
  93. try:
  94. kw = jsobj['kw']
  95. fname = jsobj['fname']
  96. url = jsobj['url']
  97. # if jsobj.get('domain') is None:
  98. # exclude = jsobj['exclude']
  99. # domain = None
  100. # else:
  101. # domain = jsobj['domain']
  102. # exclude = None
  103. city_map = {'chicago': ['42.04866173771369', '-87.68260072643513'],
  104. 'miami': ['25.764458843530548', '-80.19787522585152'],
  105. 'wc': ['38.96071674051165', '-77.03155367248387'],
  106. 'ny': ['40.76774141099703', '-73.98439238945637']}
  107. city = jsobj['fname']
  108. print(city)
  109. Map_coordinates = dict({
  110. "latitude": float(city_map[f"{city}"][0]),
  111. "longitude": float(city_map[f"{city}"][1]),
  112. "accuracy": 100
  113. })
  114. # 芝加哥、邁阿密、紐約、華盛頓
  115. # driver.execute_cdp_cmd("Emulation.setGeolocationOverride", Map_coordinates)
  116. # driver.get('https://www.google.com?num=100&lr=lang_en')
  117. driver.get(url)
  118. # print(driver.current_url)
  119. # elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  120. # time.sleep(1)
  121. # elmt.send_keys(kw)
  122. #
  123. # elmt.send_keys(Keys.ENTER)
  124. time.sleep(3)
  125. scrolling(driver, 10)
  126. time.sleep(20)
  127. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  128. numresults = len(elmts)
  129. print('搜尋結果數量', numresults)
  130. time.sleep(20)
  131. # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/angelo koo\/' +date +fname + ".png")
  132. if numresults == 0:
  133. send_msg('stop working...')
  134. sys.exit()
  135. datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': [], '結果說明': []}
  136. df = pd.DataFrame()
  137. idx = 1
  138. found = False
  139. test_lst = []
  140. txt_dict = {}
  141. for elmt in elmts:
  142. href = elmt.get_attribute('href')
  143. txt = elmt.text
  144. desc = None
  145. try:
  146. elmt2 = elmt.find_element(By.XPATH, "./../../..//div[@data-content-feature=1]")
  147. desc = elmt2.text
  148. except:
  149. desc = None
  150. # print(desc)
  151. table.insert(
  152. {'title': elmt.text, 'url': href, 'keyword': kw, 'dt': datetime.datetime.now(), 'ranking': idx,'description':fname})
  153. datadict['搜尋詞'].append(kw)
  154. datadict['結果標題'].append(txt)
  155. datadict['結果網址'].append(href)
  156. datadict['結果名次'].append(str(idx))
  157. datadict['結果說明'].append(desc)
  158. # if len(txt) > 10:
  159. # if domain is not None:
  160. # for d in domain:
  161. # if d in href:
  162. # print('found....')
  163. # print('clicked....')
  164. # print(href)
  165. # print(txt)
  166. # print("ranking", idx)
  167. # found = True
  168. # return
  169. # else:
  170. # ex = False
  171. # for ee in exclude:
  172. # if ee in href:
  173. # ex = True
  174. # if not ex:
  175. # test_lst.append(elmt)
  176. # txt_dict[elmt] = txt
  177. idx += 1
  178. # if exclude is not None:
  179. # print('exclude')
  180. # elmt = random.choice(test_lst[5:])
  181. # print(elmt)
  182. # print(txt_dict[elmt])
  183. #
  184. # webdriver.ActionChains(driver).move_to_element(elmt).perform()
  185. # # elmt.click()
  186. # webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  187. # time.sleep(5)
  188. #
  189. # if not found:
  190. # table.insert(
  191. # {'ranking': -1, 'kw': kw, 'results': numresults, 'url': '', 'title': '未收錄', 'descrption': desc})
  192. df['搜尋詞'] = datadict['搜尋詞']
  193. df['結果標題'] = datadict['結果標題']
  194. df['結果網址'] = datadict['結果網址']
  195. df['結果名次'] = datadict['結果名次']
  196. df['結果說明'] = datadict['結果說明']
  197. df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/angelo koo\/'+date+fname+".xls")
  198. except:
  199. print('exception')
  200. traceback.print_exc()
  201. # time.sleep(9999)
  202. # driver.save_screenshot('c:/tmp/seo/'+kw+".png")
  203. driver.quit()
  204. sys.exit()
  205. d = {'ny':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr",
  206. 'wc':"https://www.google.com/search?q=angelo%20koo&hl=en&gl=us&num=100&uule=w+CAIQICItV2FzaGluZ3RvbixEaXN0cmljdCBvZiBDb2x1bWJpYSxVbml0ZWQgU3RhdGVz&gws_rd=cr#gws_rd=cr&ip=1",
  207. 'miami':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIbTWlhbWksRmxvcmlkYSxVbml0ZWQgU3RhdGVz&gws_rd=cr",
  208. 'chicago':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr"}
  209. location = 'chicago'
  210. run_once({'kw':'angelo koo','fname':location,'date':'0216','url':d[location]})
  211. ####手動截圖:須按右下角的設定選擇區域######
  212. ny="https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr"
  213. wc="https://www.google.com/search?q=angelo%20koo&hl=en&gl=us&num=100&uule=w+CAIQICItV2FzaGluZ3RvbixEaXN0cmljdCBvZiBDb2x1bWJpYSxVbml0ZWQgU3RhdGVz&gws_rd=cr#gws_rd=cr&ip=1"
  214. miami="https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIbTWlhbWksRmxvcmlkYSxVbml0ZWQgU3RhdGVz&gws_rd=cr"
  215. chicago="https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr"