hhh_clickjob.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. #import redis
  2. import time
  3. #import json
  4. from selenium import webdriver
  5. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  6. import time
  7. import os
  8. import urllib.parse
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import dataset
  13. import codecs
  14. import random
  15. import requests
  16. import time
  17. import sys
  18. import docker
  19. import codecs
  20. import random
  21. import os
  22. import time
  23. driver=None
  24. headers = {
  25. "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
  26. "Content-Type": "application/x-www-form-urlencoded"
  27. }
  28. def send_msg(kw):
  29. params = {"message": "處理關鍵字: "+kw}
  30. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  31. def empty_query(q):
  32. global driver
  33. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  34. driver.get(googleurl)
  35. time.sleep(3)
  36. def process_query(qs):
  37. print('processing...')
  38. print(qs)
  39. q=qs[0]
  40. domain=qs[2]
  41. cnt=qs[1]
  42. global driver
  43. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  44. driver.get(googleurl)
  45. time.sleep(3)
  46. if cnt > 0:
  47. for i in range(cnt):
  48. elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
  49. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  50. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  51. time.sleep(2)
  52. elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
  53. idx=1
  54. ranking=-1
  55. for elmt in elmts:
  56. href=elmt.get_attribute('href')
  57. txt=elmt.text
  58. if len(txt)>10:
  59. # if 'hhh.com.tw' in href:
  60. # if 'hhh.com.tw' in href:
  61. # if 'ai.choozmo.com' in href:
  62. if domain in href:
  63. # if 'searchome.net' in href:
  64. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  65. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  66. break
  67. def re_get_webdriver():
  68. global driver
  69. result=[]
  70. client = docker.from_env()
  71. ls=client.containers.list()
  72. print(ls)
  73. for l in ls:
  74. print(l.name)
  75. if 'p4444' in l.name:
  76. l.restart()
  77. print('restarted')
  78. time.sleep(6)
  79. else:
  80. print('name not correct')
  81. # options = webdriver.EdgeOptions()
  82. try:
  83. print('trying 4444....')
  84. options = webdriver.ChromeOptions()
  85. driver = webdriver.Remote(
  86. command_executor='http://127.0.0.1:4444/wd/hub',desired_capabilities=options.to_capabilities())
  87. print('4444 done')
  88. driver.set_window_size(1400,1000)
  89. print('driver is fine')
  90. return
  91. except:
  92. print('driver except')
  93. driver=None
  94. return None
  95. def run_once(url):
  96. global driver
  97. i=random.randint(0,20)
  98. if i<=3 or driver is None:
  99. # if True:
  100. re_get_webdriver()
  101. if driver is None:
  102. print('driver is none')
  103. return
  104. try:
  105. process_query(url)
  106. except:
  107. print('process_query exception')
  108. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  109. cursor=db.query('select category from cur_category')
  110. category='hhh-faq'
  111. for c in cursor:
  112. category=c['category']
  113. break
  114. cursor=db.query('select kw,page,domain from seo_clickjobs where category="'+category+'" or category="hhh-vip" order by rand()')
  115. lst=[]
  116. for c in cursor:
  117. lst.append((c['kw'],c['page'],c['domain']))
  118. while True:
  119. l=random.choice(lst)
  120. run_once( l )
  121. time.sleep(0.001)