#import redis import time import traceback #import json from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import dataset from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.service import Service import json import random import time import datetime import sys import codecs import random import os import time import requests import pymysql import urllib.parse pymysql.install_as_MySQLdb() driver=None db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') headers = { "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi", "Content-Type": "application/x-www-form-urlencoded" } def send_msg(kw): params = {"message":kw} r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params) blacklist = ['https://www.chinatimes.com/realtimenews/20220613003142-260402'] def re_get_webdriver(): global port global driver result=[] if driver is not None: print('closing....') driver.quit() os.system('killall chrome') print('quit....') driver=None try: options = webdriver.ChromeOptions() # options.add_argument("user-agent=%s" % user_agent) options.add_argument('--headless') options.add_argument("--incognito") driver = webdriver.Chrome(options=options) driver.delete_all_cookies() driver.set_window_size(1400,1000) except: traceback.print_exc() driver=None return None def run_once(jsobj): table=db['nda_log'] print(jsobj) global driver # i=random.randint(0,9) i=100 if driver is None: time.sleep(8) re_get_webdriver() if driver is None: return try: kw=jsobj['kw'] if jsobj.get('domain') is None: exclude=jsobj['exclude'] domain=None else: domain=jsobj['domain'] exclude=None googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(kw), 100, 'zh-TW') driver.get(googleurl) time.sleep(6) print(driver.current_url) if 'sorry' in driver.current_url: print("URL Error: Caught") return # elmt = driver.find_element(By.XPATH, "//input[@name='q']") # time.sleep(1) # elmt.send_keys(kw) # elmt.send_keys(Keys.ENTER) # time.sleep(6) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a") numresults=len(elmts) print('搜尋結果數量',numresults) if numresults==0: send_msg('stop working...') sys.exit() idx=1 found=False test_lst=[] txt_dict={} for elmt in elmts: href=elmt.get_attribute('href') txt=elmt.text if len(txt)>10: if domain is not None: if domain in href: print('found....') print('clicked....') print(href) print("ranking", idx) found=True webdriver.ActionChains(driver).move_to_element(elmt).perform() # elmt.click() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt,'dt':datetime.datetime.now(),'client':jsobj['cust']}) time.sleep(5) page_height = driver.execute_script("return document.body.scrollHeight") scroll_step = page_height // 4 current_height = 0 while current_height < page_height: driver.execute_script(f"window.scrollTo(0, {current_height + scroll_step});") time.sleep(3) current_height += scroll_step time.sleep(10) break else: ex=False for ee in exclude: if ee in href: ex=True if not ex: test_lst.append(elmt) txt_dict[elmt]=txt idx+=1 if exclude is not None: print('exclude') elmt=random.choice(test_lst[5:]) print(elmt) print(txt_dict[elmt]) webdriver.ActionChains(driver).move_to_element(elmt).perform() elmt.click() # webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) if not found: #don't waste resources, pick a random link as long as it is ok pick='' negativeflag=True while negativeflag==True: negativeflag=False pick = random.choice(elmts) href = pick.get_attribute('href') if href in blacklist: negativeflag=True webdriver.ActionChains(driver).move_to_element(pick).perform() webdriver.ActionChains(driver).move_to_element(pick).click().perform() #table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄','client':jsobj['cust']}) except: print('exception') traceback.print_exc() driver.quit() # sys.exit() while True: try: cursor=db.query('select json from seo.seo_jobs where cust="啟翔" and plan="形象SEO" and json like "%陳百欽%" order by rand() limit 1') for c in cursor: js=json.loads(c['json']) prefix=js['prefix'] postfix=js['postfix'] domain=js['domain'][0] positive=js['positive'] rnd=js['rnd'] kw='' while '陳百欽' not in kw: kw='' kw1=random.choice(positive) kw2=random.choice(rnd) kw=kw1+" "+prefix+" "+kw2 code='03' run_once({'domain':domain,'kw':kw, 'cust':'啟翔'}) time.sleep(61) cursor=None driver=None except: traceback.print_exc() print("Execution Error") time.sleep(20)