| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 | #import redisimport time#import jsonfrom selenium import webdriverfrom selenium.webdriver.common.desired_capabilities import DesiredCapabilitiesimport timeimport osimport urllib.parsefrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support import expected_conditions as ECimport codecsimport randomimport requestsimport timeimport sysimport dockerimport codecsimport randomimport osimport timedriver=Noneheaders = {        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",        "Content-Type": "application/x-www-form-urlencoded"}def send_msg(kw):    params = {"message": "處理關鍵字: "+kw}      r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)def empty_query(q):    global driver    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)    driver.get(googleurl)    time.sleep(3)def process_query(qs):    q=qs[0]    domain=qs[2]    cnt=qs[1]    global driver    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)    driver.get(googleurl)    time.sleep(3)    if cnt > 0:        for i in range(cnt):            elmt=driver.find_element_by_xpath("//a[@id='pnnext']")            webdriver.ActionChains(driver).move_to_element(elmt).perform()            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()    time.sleep(3)    elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")    idx=1    ranking=-1    for elmt in elmts:        href=elmt.get_attribute('href')        txt=elmt.text        if len(txt)>10:#            if 'hhh.com.tw' in href:#            if 'hhh.com.tw' in href:#            if 'ai.choozmo.com' in href:            if domain in href:#            if 'searchome.net' in href:                webdriver.ActionChains(driver).move_to_element(elmt).perform()                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()                breakdef re_get_webdriver():    global driver    result=[]    client = docker.from_env()    ls=client.containers.list()    print(ls)    for l in ls:        if 'p6666' in l.name:            ls[0].restart()    time.sleep(4)#    options = webdriver.EdgeOptions()    try:        driver = webdriver.Remote(            command_executor='http://127.0.0.1:6666/wd/hub')        driver.set_window_size(1400,1000)        return    except:        driver=None        return None    driver=Nonedef run_once(url):    global driver    i=random.randint(0,20)    if i<=3 or driver is None:#    if True:        re_get_webdriver()    if driver is None:        return    driver.execute_script('window.open("'+url+'","_blank");')#    driver.get(url)    driver.execute_script("window.scrollTo(0, window.scrollY + 400)")    time.sleep(1)fpath=__file__#    fpath=fpath.replace('hhh_click_selenium.py','cases.csv')fpath=fpath.replace('hhh_666_edges.py','urls.csv')print(fpath)lst=[]fr=codecs.open(fpath,'r','utf-8')lines=fr.readlines()for l in lines:    elmts=l.split(',')    lst.append('https://www.hhh.com.tw'+elmts[0])fr.close()#lst=['https://www.hhh.com.tw/columns/detail/3427/index.php']    #for i in range(20):while True:    l=random.choice(lst)    print(l)#    driver.get(l)    run_once(l)#    time.sleep(2)
 |