import time from datetime import datetime as dt import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os import urllib.parse from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import codecs import random import requests import dataset import traceback import sys from selenium.webdriver.common.keys import Keys import timeit import socket db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') driver=None headers = { "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2", "Content-Type": "application/x-www-form-urlencoded" } sleepoffset = 0 def send_msg(kw): params = {"message": "處理關鍵字: "+kw} r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params) def empty_query(q): global driver googleurl='https://www.google.com/search?q='+urllib.parse.quote(q) driver.get(googleurl) time.sleep(3) def process_query(urllist, query, client): sleepoffset = 0 global driver driver.get('https://www.google.com?num=100') time.sleep(3) print(driver.current_url) # elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")a4bIc # ABOVE METHOD IS DEPRECATED STARTING SELENIUM 4.3.0, USE THIS # try: elmt = driver.find_element(By.XPATH, "//textarea[@name='q']") except: elmt = driver.find_element(By.XPATH, "//input[@name='q']") time.sleep(1) elmt.send_keys(query) elmt.send_keys(Keys.ENTER) idx=1 ranking=-1 domain_in_link = 0 googleurl = driver.current_url print(driver.current_url) if "sorry" in googleurl: return 444 try: #in case there are duplicates... clickmore=driver.find_element("xpath","//p[@id='ofr']").find_element(By.TAG_NAME, "a") webdriver.ActionChains(driver).move_to_element(clickmore).perform() webdriver.ActionChains(driver).move_to_element(clickmore).click().perform() except: pass elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a") print (len(elmts)) # driver.save_screenshot('c:/tmp/test.png') n=0 clickcand=[] for el in elmts: n+=1 #txt=el.text txt='aaaaaaaaaaa' href=el.get_attribute('href') if len(txt)>10: for url in urllist: if url in href: clickcand.append([el,n]) if len(clickcand)!=0: '''for e in clickcand: href = e[0].get_attribute('href') print(href) print(e[0].text) print("Rank: " + str(e[1])) db['sns_log'].insert({"kw": query, "ranking": e[1], "url": href, "dt": dt.now(), "client": client, "title": e[0].text, "results": n})''' e = random.choice(clickcand) el = e[0] domain_in_link += 1 print('clicked....') href = el.get_attribute('href') print(href) print(el.text) webdriver.ActionChains(driver).move_to_element(el).perform() webdriver.ActionChains(driver).move_to_element(el).click().perform() db['sns_log'].insert({"kw": query, "ranking": e[1], "url": href, "dt": dt.now(), "client": client, "title": el.text, "results": n}) duration = random.randint(40,60) time.sleep(duration) print(domain_in_link) return 200 return 0 # if no articles found def run_once(url, query, client): global driver result=[] options = webdriver.ChromeOptions() options.add_argument('--headless') # options.add_argument("--user-agent=" +user_agent) options.add_argument("--incognito") options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') driver = webdriver.Chrome( options=options) driver.delete_all_cookies() driver.set_window_size(1400,1000) statuscode = process_query(url, query, client) driver.quit() return statuscode #execution starts here def execute(url, query, client): print("Ctrl+C or Ctrl+Z to stop.") statuscode = 0 st = timeit.default_timer() try: statuscode = run_once(url, query, client) except: traceback.print_exc() timetaken = timeit.default_timer()-st print("Time taken: " + str(timetaken)) print("Process returned with " + str(statuscode)) if statuscode == 444: print("You have been caught!!!") #notify("Clickbot " + brands[domain] + " has been caught by Google and will terminate. IP: ") extrasleep = 0 if(timetaken < 50): extrasleep = 50 - timetaken print("Ctrl+C or Ctrl+Z to stop now.") print("You have " + str(10 + extrasleep) + " seconds.") time.sleep(10 + extrasleep) return statuscode