# import redis import time import traceback # import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time import redis import sys import codecs import pandas as pd import random import os import time import datetime from selenium.webdriver.chrome.service import Service import dataset import pymysql pymysql.install_as_MySQLdb() from userAgentRandomizer import userAgents import requests driver = None db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') headers = { "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi", "Content-Type": "application/x-www-form-urlencoded" } def send_msg(kw): params = {"message": kw} r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params) def re_get_webdriver(): global port global driver result = [] if driver is not None: print('closing....') driver.quit() os.system('killall chrome') print('quit....') driver = None try: ua = userAgents() user_agent = ua.random() options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.add_argument("--headless") print(user_agent) options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'}) options.add_argument("--incognito") driver = None try: if os.name == 'nt': driver = webdriver.Chrome(options=options) else: driver = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Downloads\chromedriver_107\chromedriver', options=options) except: traceback.print_exc() return driver.delete_all_cookies() driver.set_window_size(950, 20000) return except: traceback.print_exc() driver = None return None def scrolling(driver, pgnum): ub = driver.find_element("css selector",'body') for i in range(pgnum): ub.send_keys(Keys.PAGE_DOWN) if pgnum > 1: time.sleep(0.3) def run_once(jsobj): table = db['nda_log'] print(jsobj) global driver if driver is None: time.sleep(8) re_get_webdriver() if driver is None: return try: kw = jsobj['kw'] url = jsobj['url'] # if jsobj.get('domain') is None: # exclude = jsobj['exclude'] # domain = None # else: # domain = jsobj['domain'] # exclude = None driver.get(url) time.sleep(6) scrolling(driver,10) time.sleep(20) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") numresults = len(elmts) print('搜尋結果數量', numresults) time.sleep(20) # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/angelo koo\/' +date +fname + ".png") if numresults == 0: send_msg('stop working...') sys.exit() datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': [], '結果說明': []} df = pd.DataFrame() idx = 1 found = False test_lst = [] txt_dict = {} for elmt in elmts: href = elmt.get_attribute('href') txt = elmt.text desc = None if domain in href: print(href) print("ranking", idx) webdriver.ActionChains(driver).move_to_element(elmt).perform() # elmt.click() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() print('clicked....') table.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt, 'dt': datetime.datetime.now(), 'client': 'i5'}) time.sleep(6) db.close() break idx += 1 # if exclude is not None: # print('exclude') # elmt = random.choice(test_lst[5:]) # print(elmt) # print(txt_dict[elmt]) # # webdriver.ActionChains(driver).move_to_element(elmt).perform() # # elmt.click() # webdriver.ActionChains(driver).move_to_element(elmt).click().perform() # time.sleep(5) # # if not found: # table.insert( # {'ranking': -1, 'kw': kw, 'results': numresults, 'url': '', 'title': '未收錄', 'descrption': desc}) # df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/angelo koo\/'+date+fname+".xls") except: print('exception') traceback.print_exc() # time.sleep(9999) # driver.save_screenshot('c:/tmp/seo/'+kw+".png") driver.quit() sys.exit() city_lst = ["w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr", "w+CAIQICIYV2FzaGluZ3RvbixVbml0ZWQgU3RhdGVz&gws_rd=cr", "w+CAIQICIbTWlhbWksRmxvcmlkYSxVbml0ZWQgU3RhdGVz&gws_rd=cr", "w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr"] # cursor=db.query('select json from seo_jobs where cust="美東" and plan="形象SEO" order by rand() limit 1') # for c in cursor: # js=json.loads(c['json']) # prefix=js['prefix'] # postfix=js['postfix'] # domain=js['domain'][0] # positive=js['positive'] # rnd=js['rnd'] # # kw1=random.choice(positive) # kw2=random.choice(rnd) # kw=prefix+" "+kw1+" "+kw2 # city = random.choice(city_lst) url = "https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr" run_once({'kw':'angelo koo','url':url})