_clickjob.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. import time
  2. from datetime import datetime
  3. import json
  4. from selenium import webdriver
  5. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  6. import time
  7. import os
  8. import urllib.parse
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import codecs
  13. import random
  14. import requests
  15. import dataset
  16. import traceback
  17. import sys
  18. from selenium.webdriver.common.keys import Keys
  19. add_tabs = [0,0,1,2,0,3,4,0,1,2,0,3,4,0,1,2,0,3,4,0]
  20. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  21. driver=None
  22. headers = {
  23. "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
  24. "Content-Type": "application/x-www-form-urlencoded"
  25. }
  26. sleepoffset = 0
  27. def send_msg(kw):
  28. params = {"message": "處理關鍵字: "+kw}
  29. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  30. def empty_query(q):
  31. global driver
  32. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  33. driver.get(googleurl)
  34. time.sleep(3)
  35. def process_query(domain, target_domain, brands, query):
  36. print(query)
  37. sleepoffset = 0
  38. global driver
  39. driver.get('https://www.google.com?num=100')
  40. time.sleep(3)
  41. print(driver.current_url)
  42. # elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
  43. # ABOVE METHOD IS DEPRECATED STARTING SELENIUM 4.3.0, USE THIS
  44. #
  45. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  46. time.sleep(1)
  47. elmt.send_keys(query)
  48. elmt.send_keys(Keys.ENTER)
  49. idx=1
  50. ranking=-1
  51. domain_in_link = 0
  52. googleurl = driver.current_url
  53. print(driver.current_url)
  54. elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a")
  55. print (len(elmts))
  56. # driver.save_screenshot('c:/tmp/test.png')
  57. for el in elmts:
  58. href=el.get_attribute('href')
  59. txt=el.text
  60. if len(txt)>10:
  61. if domain in href:
  62. domain_in_link += 1
  63. print('clicked....')
  64. print(href)
  65. print(txt)
  66. webdriver.ActionChains(driver).move_to_element(el).perform()
  67. webdriver.ActionChains(driver).move_to_element(el).click().perform()
  68. time.sleep(6)
  69. new_windows_count = add_tabs[random.randint(0,19)]
  70. print(str(new_windows_count) + " new tabs")
  71. for i in range (0,new_windows_count):
  72. original_window = driver.current_window_handle
  73. driver.switch_to.new_window('window')
  74. driver.get(href)
  75. sleepoffset += 4
  76. time.sleep(6)
  77. driver.close()
  78. driver.switch_to.window(original_window)
  79. if domain in target_domain:
  80. print("Target link found")
  81. time_stamp = datetime.fromtimestamp(time.time())
  82. time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
  83. db['click_results'].insert({"time_stamp": time_stamp, "brand": brands[domain], "domain": domain, "query": query, "url": href, "content": txt, "extra_windows": new_windows_count})
  84. break
  85. if domain in target_domain:
  86. print("Target domain found")
  87. time_stamp = datetime.fromtimestamp(time.time())
  88. time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
  89. db['query_results'].insert({"time_stamp": time_stamp, "brand": brands[domain], "domain": domain, "query": query, "googleurl": googleurl, "element_count": len(elmts), "domain_in_link_count": domain_in_link})
  90. print(domain_in_link)
  91. def run_once(domain, target_domain, brands, query):
  92. global driver
  93. result=[]
  94. options = webdriver.ChromeOptions()
  95. options.add_argument('--headless')
  96. # options.add_argument("--user-agent=" +user_agent)
  97. options.add_argument("--incognito")
  98. options.add_argument('--no-sandbox')
  99. options.add_argument('--disable-dev-shm-usage')
  100. driver = webdriver.Chrome(
  101. options=options)
  102. driver.delete_all_cookies()
  103. driver.set_window_size(1400,1000)
  104. process_query(domain, target_domain, brands, query)
  105. time.sleep(3)
  106. driver.quit()
  107. #execution starts here
  108. def execute(domain, target_domain, brands, query_list):
  109. while True:
  110. try:
  111. run_once(domain, target_domain, brands, random.choice(query_list))
  112. except:
  113. traceback.print_exc()
  114. sleepint=random.randint(75,110) - sleepoffset
  115. print("Completed (" + str(sleepint) + ")")
  116. time.sleep(sleepint)