#import redis import time import traceback #import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import urllib import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time import redis import sys import codecs import random import os import time import requests import datetime driver=None from fake_useragent import UserAgent ua = UserAgent() #proxy_enabled=True # proxy_enabled=False # # https://youtu.be/cR2M5Khgxvc # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') # glog_table=db['general_log'] def re_get_webdriver(): # global port global driver global portnum # os.system('killall chrome') result=[] # if driver is not None: # print('closing....') # driver.quit() # print('quit....') # driver=None # os.system() options = webdriver.ChromeOptions() options.add_argument("--user-agent=" +ua.random) options.add_argument("--no-sandbox") options.add_argument("--headless") options.add_argument("--incognito") driver = webdriver.Remote( command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub', options=options) return driver # try: # options = webdriver.ChromeOptions() # options.add_argument("--no-sandbox") # options.add_argument("--headless") # options.add_argument("--incognito") # # if proxy_enabled: # # options.add_argument('--proxy-server=socks5://172.104.92.245:14900') # try: # driver = webdriver.Remote( # command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub', # options=options) # except: # traceback.print_exc() # return None # return driver # except: # traceback.print_exc() # driver=None # return None # return driver def run_once(): global count global portnum global bok # global glog_table # table=db['nda_log'] # print(jsobj) # kw=jsobj['kw'] # options = webdriver.ChromeOptions() # options.add_argument("--no-sandbox") # options.add_argument("--headless") # options.add_argument("--incognito") # driver = webdriver.Remote( # command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub', # options=options) # if driver is not None: # break ettoday_url_list = [ 'https://house.ettoday.net/news/1586609', 'https://house.ettoday.net/news/1586604', 'https://house.ettoday.net/news/1597942', 'https://house.ettoday.net/news/1597936', 'https://house.ettoday.net/news/1675455', 'https://house.ettoday.net/news/1701065', 'https://house.ettoday.net/news/1700425', 'https://house.ettoday.net/news/1492047', 'https://house.ettoday.net/news/1492167', 'https://house.ettoday.net/news/1492288', 'https://house.ettoday.net/news/1492178', 'https://house.ettoday.net/news/1492229', 'https://house.ettoday.net/news/1492134', 'https://house.ettoday.net/news/1492240', 'https://house.ettoday.net/news/1492161', 'https://house.ettoday.net/news/1492168', 'https://house.ettoday.net/news/1492217'] # try: for i in ettoday_url_list: try: driver=re_get_webdriver() except: portnum=random.randint(4444,4555) print(portnum) os.system('docker container stop p8817') time.sleep(5) os.system('docker container rm p8817') time.sleep(5) os.system('docker run -d -p '+str(portnum)+':4444 --name p8817 --shm-size=500M --dns 168.95.1.1 selenium/standalone-chrome:110.0') bok+=1 count=0 time.sleep(5) driver=re_get_webdriver() time.sleep(3) try: driver.get(i) time.sleep(3) elmt_next = driver.find_element(By.XPATH, '//*[@id="house"]/div[3]/div[2]/div[6]/div/div/div[1]/article/div/div[4]/p[1]/a') webdriver.ActionChains(driver).move_to_element(elmt_next).perform() time.sleep(3) webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform() print("cick!",i) count+=1 print("count_time:",count,';broken_time:',bok) # elmt = driver.find_element(By.XPATH, '//*[@id="yschsp"]') time.sleep(random.randint(3,7)) #driver.close() driver.quit() except: #driver.close() try: driver.quit() except: print('no have driver') print("wrong",i,';broken_time:',bok) time.sleep(5) # except: # print('wrong for:',i) # kw=jsobj['kw'] # if jsobj.get('domain') is None: # exclude=jsobj['exclude'] # domain=None # else: # domain=jsobj['domain'] # exclude=None # driver.get('https://www.google.com?num=100') # time.sleep(17) # while True: # try: # print(driver.current_url) # break # except: # traceback.print_exc() # driver=re_get_webdriver() # time.sleep(3) # driver.get('https://www.google.com?num=100') # time.sleep(3) # time.sleep(3) # elmt = driver.find_element(By.XPATH, "//input[@name='q']") # time.sleep(1) # elmt.send_keys(kw) # elmt.send_keys(Keys.ENTER) # time.sleep(6) # elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") # numresults=len(elmts) # # time.sleep(9999) # print('搜尋結果數量',numresults) # if numresults==0: # print(driver.current_url) # print(driver.title) # sys.exit() # idx=1 # found=False # test_lst=[] # for elmt in elmts: # href=elmt.get_attribute('href') # txt=elmt.text # if len(txt)>10: # if domain is not None: # for d in domain: # if d in href: # print('found....') # print('clicked....') # print(href) # print(txt) # print("ranking", idx) # found=True # webdriver.ActionChains(driver).move_to_element(elmt).perform() # # elmt.click() # webdriver.ActionChains(driver).move_to_element(elmt).click().perform() # table.insert({'kw':kw,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'result':numresults,'client':'64G'}) # time.sleep(6) # return # else: # if exclude not in href: # test_lst.append(elmt) # idx+=1 # if exclude is not None: # print('exclude') # elmt=random.choice(test_lst) # print(elmt) # webdriver.ActionChains(driver).move_to_element(elmt).perform() # webdriver.ActionChains(driver).move_to_element(elmt).click().perform() # time.sleep(5) # if not found: # table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'}) # except: # traceback.print_exc() # print('exception') # traceback.print_exc() # time.sleep(5) # r=random.randint(0,27) # r=26 # cursor=db.query('select json from seo_jobs where cust="KNIGHT" and plan="形象SEO" order by rand() limit 1') # for c in cursor: # js=json.loads(c['json']) # prefix=js['prefix'] # postfix=js['postfix'] # domain=js['domain'][0] # positive=js['positive'] # rnd=js['rnd'] portnum=random.randint(4444,4555) print(portnum) os.system('docker container stop p8817') time.sleep(5) os.system('docker container rm p8817') time.sleep(5) os.system('docker run -d -p '+str(portnum)+':4444 --name p8817 --shm-size=500M --dns 168.95.1.1 selenium/standalone-chrome:110.0') bok=0 count=0 time.sleep(5) while True: # run_once() # time.sleep(10) try: run_once() except: bok+=1 print('broken') time.sleep(5) # kw=random.choice(positive) # kw2=random.choice(rnd) # count=0 # while True: # try: # run_once({'domain':domain,'kw':prefix+" "+kw+" "+kw2}) # count+=1 # except: # continue # print('中場休息 次數',count) # time.sleep(random.randint(120,150))