seo_hhh.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. import time
  2. import json
  3. from selenium import webdriver
  4. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  5. import time
  6. import os
  7. import urllib.parse
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.common.by import By
  10. from selenium.webdriver.support import expected_conditions as EC
  11. import codecs
  12. import random
  13. import dataset
  14. import time
  15. import traceback
  16. import sys
  17. driver=None
  18. headers = {
  19. "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
  20. "Content-Type": "application/x-www-form-urlencoded"
  21. }
  22. def empty_query(q):
  23. global driver
  24. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  25. driver.get(googleurl)
  26. time.sleep(3)
  27. def process_query(qs):
  28. q=qs[0]
  29. domain=qs[1]
  30. global driver
  31. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
  32. print(googleurl)
  33. driver.get(googleurl)
  34. time.sleep(6)
  35. elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
  36. idx=1
  37. ranking=-1
  38. print(len(elmts))
  39. # driver.save_screenshot('c:/tmp/test.png')
  40. for elmt in elmts:
  41. href=elmt.get_attribute('href')
  42. txt=elmt.text
  43. if len(txt)>10:
  44. if domain in href:
  45. print('clicked....')
  46. print(href)
  47. print(txt)
  48. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  49. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  50. break
  51. def run_once(q):
  52. global driver
  53. print('run_once()')
  54. result=[]
  55. options = webdriver.ChromeOptions()
  56. options.add_argument('--headless')
  57. # options.add_argument("--user-agent=" +user_agent)
  58. # options.add_argument("--incognito")
  59. options.add_argument('--no-sandbox')
  60. options.add_argument("--disable-gpu")
  61. options.add_argument('--disable-dev-shm-usage')
  62. driver = webdriver.Chrome(
  63. options=options)
  64. driver.delete_all_cookies()
  65. driver.set_window_size(1400,1000)
  66. print(q)
  67. process_query(q)
  68. time.sleep(3)
  69. driver.quit()
  70. #lst=[{'kw':'幸福空間','domain':'hhh.com.tw','page':0}]
  71. lst=[]
  72. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  73. cursor=db.query('SELECT kw as term,domain FROM seo.seo_clickjobs where category="hhh-faq" order by rand()')
  74. for c in cursor:
  75. lst.append(c)
  76. #for c in lst:
  77. while True:
  78. try:
  79. c=random.choice(lst)
  80. run_once( (c['term'],c['domain']) )
  81. except:
  82. traceback.print_exc()
  83. sleepint=random.randint(290,420)
  84. time.sleep(sleepint)