_clickjob.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. import time
  2. from datetime import datetime as dt
  3. import json
  4. from selenium import webdriver
  5. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  6. import time
  7. import os
  8. import urllib.parse
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import codecs
  13. import random
  14. import requests
  15. import dataset
  16. import traceback
  17. import sys
  18. from selenium.webdriver.common.keys import Keys
  19. import timeit
  20. import socket
  21. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  22. driver=None
  23. headers = {
  24. "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
  25. "Content-Type": "application/x-www-form-urlencoded"
  26. }
  27. sleepoffset = 0
  28. def send_msg(kw):
  29. params = {"message": "處理關鍵字: "+kw}
  30. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  31. def empty_query(q):
  32. global driver
  33. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  34. driver.get(googleurl)
  35. time.sleep(3)
  36. def process_query(urllist, query, client):
  37. sleepoffset = 0
  38. global driver
  39. driver.get('https://www.google.com?num=100')
  40. time.sleep(3)
  41. print(driver.current_url)
  42. # elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")a4bIc
  43. # ABOVE METHOD IS DEPRECATED STARTING SELENIUM 4.3.0, USE THIS
  44. #
  45. try:
  46. elmt = driver.find_element(By.XPATH, "//textarea[@name='q']")
  47. except:
  48. elmt = driver.find_element(By.XPATH, "//input[@name='q']")
  49. time.sleep(1)
  50. elmt.send_keys(query)
  51. elmt.send_keys(Keys.ENTER)
  52. idx=1
  53. ranking=-1
  54. domain_in_link = 0
  55. googleurl = driver.current_url
  56. print(driver.current_url)
  57. if "sorry" in googleurl:
  58. return 444
  59. try: #in case there are duplicates...
  60. clickmore=driver.find_element("xpath","//p[@id='ofr']").find_element(By.TAG_NAME, "a")
  61. webdriver.ActionChains(driver).move_to_element(clickmore).perform()
  62. webdriver.ActionChains(driver).move_to_element(clickmore).click().perform()
  63. except:
  64. pass
  65. elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a")
  66. print (len(elmts))
  67. # driver.save_screenshot('c:/tmp/test.png')
  68. n=0
  69. clickcand=[]
  70. for el in elmts:
  71. n+=1
  72. #txt=el.text
  73. txt='aaaaaaaaaaa'
  74. href=el.get_attribute('href')
  75. if len(txt)>10:
  76. for url in urllist:
  77. if url in href:
  78. clickcand.append([el,n])
  79. if len(clickcand)!=0:
  80. '''for e in clickcand:
  81. href = e[0].get_attribute('href')
  82. print(href)
  83. print(e[0].text)
  84. print("Rank: " + str(e[1]))
  85. db['sns_log'].insert({"kw": query, "ranking": e[1], "url": href, "dt": dt.now(), "client": client, "title": e[0].text, "results": n})'''
  86. e = random.choice(clickcand)
  87. el = e[0]
  88. domain_in_link += 1
  89. print('clicked....')
  90. href = el.get_attribute('href')
  91. print(href)
  92. print(el.text)
  93. webdriver.ActionChains(driver).move_to_element(el).perform()
  94. webdriver.ActionChains(driver).move_to_element(el).click().perform()
  95. db['sns_log'].insert({"kw": query, "ranking": e[1], "url": href, "dt": dt.now(), "client": client, "title": el.text, "results": n})
  96. duration = random.randint(40,60)
  97. time.sleep(duration)
  98. print(domain_in_link)
  99. return 200
  100. return 0 # if no articles found
  101. def run_once(url, query, client):
  102. global driver
  103. result=[]
  104. options = webdriver.ChromeOptions()
  105. options.add_argument('--headless')
  106. # options.add_argument("--user-agent=" +user_agent)
  107. options.add_argument("--incognito")
  108. options.add_argument('--no-sandbox')
  109. options.add_argument('--disable-dev-shm-usage')
  110. driver = webdriver.Chrome(
  111. options=options)
  112. driver.delete_all_cookies()
  113. driver.set_window_size(1400,1000)
  114. statuscode = process_query(url, query, client)
  115. driver.quit()
  116. return statuscode
  117. #execution starts here
  118. def execute(url, query, client):
  119. print("Ctrl+C or Ctrl+Z to stop.")
  120. statuscode = 0
  121. st = timeit.default_timer()
  122. try:
  123. statuscode = run_once(url, query, client)
  124. except:
  125. traceback.print_exc()
  126. timetaken = timeit.default_timer()-st
  127. print("Time taken: " + str(timetaken))
  128. print("Process returned with " + str(statuscode))
  129. if statuscode == 444:
  130. print("You have been caught!!!")
  131. #notify("Clickbot " + brands[domain] + " has been caught by Google and will terminate. IP: ")
  132. extrasleep = 0
  133. if(timetaken < 50):
  134. extrasleep = 50 - timetaken
  135. print("Ctrl+C or Ctrl+Z to stop now.")
  136. print("You have " + str(10 + extrasleep) + " seconds.")
  137. time.sleep(10 + extrasleep)
  138. return statuscode