_clickjob.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. import time
  2. from datetime import datetime as dt
  3. import json
  4. from selenium import webdriver
  5. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  6. import time
  7. import os
  8. import urllib.parse
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import codecs
  13. import random
  14. import requests
  15. import dataset
  16. import traceback
  17. import sys
  18. from selenium.webdriver.common.keys import Keys
  19. import timeit
  20. import socket
  21. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  22. driver=None
  23. headers = {
  24. "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
  25. "Content-Type": "application/x-www-form-urlencoded"
  26. }
  27. sleepoffset = 0
  28. def send_msg(kw):
  29. params = {"message": "處理關鍵字: "+kw}
  30. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  31. def empty_query(q):
  32. global driver
  33. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  34. driver.get(googleurl)
  35. time.sleep(3)
  36. def process_query(urllist, query, client):
  37. sleepoffset = 0
  38. global driver
  39. driver.get('https://www.google.com?num=100')
  40. time.sleep(3)
  41. print(driver.current_url)
  42. # elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")a4bIc
  43. # ABOVE METHOD IS DEPRECATED STARTING SELENIUM 4.3.0, USE THIS
  44. #
  45. #elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  46. elmt = driver.find_element(By.XPATH, "//textarea[@name='q']")
  47. time.sleep(1)
  48. elmt.send_keys(query)
  49. elmt.send_keys(Keys.ENTER)
  50. idx=1
  51. ranking=-1
  52. domain_in_link = 0
  53. googleurl = driver.current_url
  54. print(driver.current_url)
  55. if "sorry" in googleurl:
  56. return 444
  57. try: #in case there are duplicates...
  58. clickmore=driver.find_element("xpath","//p[@id='ofr']").find_element(By.TAG_NAME, "a")
  59. webdriver.ActionChains(driver).move_to_element(clickmore).perform()
  60. webdriver.ActionChains(driver).move_to_element(clickmore).click().perform()
  61. except:
  62. pass
  63. elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a")
  64. print (len(elmts))
  65. # driver.save_screenshot('c:/tmp/test.png')
  66. n=0
  67. clickcand=[]
  68. for el in elmts:
  69. n+=1
  70. #txt=el.text
  71. txt='aaaaaaaaaaa'
  72. href=el.get_attribute('href')
  73. if len(txt)>10:
  74. for url in urllist:
  75. if url in href:
  76. clickcand.append([el,n])
  77. if len(clickcand)!=0:
  78. '''for e in clickcand:
  79. href = e[0].get_attribute('href')
  80. print(href)
  81. print(e[0].text)
  82. print("Rank: " + str(e[1]))
  83. db['sns_log'].insert({"kw": query, "ranking": e[1], "url": href, "dt": dt.now(), "client": client, "title": e[0].text, "results": n})'''
  84. e = random.choice(clickcand)
  85. el = e[0]
  86. domain_in_link += 1
  87. print('clicked....')
  88. href = el.get_attribute('href')
  89. print(href)
  90. print(el.text)
  91. webdriver.ActionChains(driver).move_to_element(el).perform()
  92. webdriver.ActionChains(driver).move_to_element(el).click().perform()
  93. db['sns_log'].insert({"kw": query, "ranking": e[1], "url": href, "dt": dt.now(), "client": client, "title": el.text, "results": n})
  94. duration = random.randint(40,60)
  95. time.sleep(duration)
  96. print(domain_in_link)
  97. return 200
  98. return 0 # if no articles found
  99. def run_once(url, query, client):
  100. global driver
  101. result=[]
  102. options = webdriver.ChromeOptions()
  103. options.add_argument('--headless')
  104. # options.add_argument("--user-agent=" +user_agent)
  105. options.add_argument("--incognito")
  106. options.add_argument('--no-sandbox')
  107. options.add_argument('--disable-dev-shm-usage')
  108. driver = webdriver.Chrome(
  109. options=options)
  110. driver.delete_all_cookies()
  111. driver.set_window_size(1400,1000)
  112. statuscode = process_query(url, query, client)
  113. driver.quit()
  114. return statuscode
  115. #execution starts here
  116. def execute(url, query, client):
  117. print("Ctrl+C or Ctrl+Z to stop.")
  118. statuscode = 0
  119. st = timeit.default_timer()
  120. try:
  121. statuscode = run_once(url, query, client)
  122. except:
  123. traceback.print_exc()
  124. timetaken = timeit.default_timer()-st
  125. print("Time taken: " + str(timetaken))
  126. print("Process returned with " + str(statuscode))
  127. if statuscode == 444:
  128. print("You have been caught!!!")
  129. #notify("Clickbot " + brands[domain] + " has been caught by Google and will terminate. IP: ")
  130. extrasleep = 0
  131. if(timetaken < 50):
  132. extrasleep = 50 - timetaken
  133. print("Ctrl+C or Ctrl+Z to stop now.")
  134. print("You have " + str(10 + extrasleep) + " seconds.")
  135. time.sleep(10 + extrasleep)
  136. return statuscode