db_clickjob.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. import redis
  2. import time
  3. import json
  4. from selenium import webdriver
  5. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  6. import time
  7. import os
  8. import urllib.parse
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import codecs
  13. import random
  14. from bs4 import BeautifulSoup
  15. import requests
  16. import dataset
  17. import time
  18. import rpyc
  19. import sys
  20. import docker
  21. driver=None
  22. headers = {
  23. "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
  24. "Content-Type": "application/x-www-form-urlencoded"
  25. }
  26. def send_msg(kw):
  27. params = {"message": "處理關鍵字: "+kw}
  28. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  29. def empty_query(q):
  30. global driver
  31. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  32. driver.get(googleurl)
  33. time.sleep(3)
  34. def process_query(qs):
  35. q=qs[0]
  36. domain=qs[2]
  37. cnt=qs[1]
  38. global driver
  39. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  40. driver.get(googleurl)
  41. time.sleep(3)
  42. if cnt > 0:
  43. for i in range(cnt):
  44. elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
  45. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  46. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  47. time.sleep(3)
  48. elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
  49. idx=1
  50. ranking=-1
  51. for elmt in elmts:
  52. href=elmt.get_attribute('href')
  53. txt=elmt.text
  54. if len(txt)>10:
  55. # if 'hhh.com.tw' in href:
  56. # if 'hhh.com.tw' in href:
  57. # if 'ai.choozmo.com' in href:
  58. if domain in href:
  59. # if 'searchome.net' in href:
  60. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  61. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  62. break
  63. def run_once(q):
  64. global driver
  65. result=[]
  66. # client = docker.from_env()
  67. # ls=client.containers.list()
  68. # print(ls)
  69. # ls[0].restart()
  70. time.sleep(10)
  71. options = webdriver.ChromeOptions()
  72. driver = webdriver.Chrome(
  73. desired_capabilities=options.to_capabilities())
  74. # driver = webdriver.Remote(
  75. # command_executor='http://127.0.0.1:4444/wd/hub',
  76. # desired_capabilities=options.to_capabilities())
  77. driver.set_window_size(1400,1000)
  78. print(q)
  79. print(q[0])
  80. process_query(q)
  81. # send_msg(q[0])
  82. # empty_query(q)
  83. intsleep=random.randint(5,12)
  84. time.sleep(intsleep)
  85. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  86. cursor=db.query('select category from cur_category')
  87. category='hhh-faq'
  88. for c in cursor:
  89. category=c['category']
  90. break
  91. cursor=db.query('select kw,page,domain from seo_clickjobs where category="'+category+'" order by rand()')
  92. for c in cursor:
  93. run_once( (c['kw'],c['page'],c['domain']) )
  94. time.sleep(0.001)