| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287 | #import redisimport timeimport traceback#import jsonfrom selenium import webdriverfrom selenium.webdriver.common.desired_capabilities import DesiredCapabilitiesimport timeimport urllibimport osfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support import expected_conditions as ECimport datasetfrom selenium.webdriver.common.keys import Keysimport jsonimport randomimport timeimport redisimport sysimport codecsimport randomimport osimport timeimport requestsimport datetimedriver=Nonefrom fake_useragent import UserAgentua = UserAgent()#proxy_enabled=True# proxy_enabled=False# # https://youtu.be/cR2M5Khgxvc# db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')# glog_table=db['general_log']def re_get_webdriver():    # global port    global driver    global portnum    # os.system('killall chrome')    result=[]    # if driver is not None:    #     print('closing....')    #     driver.quit()    #     print('quit....')    #     driver=None    # os.system()    options = webdriver.ChromeOptions()    options.add_argument("--user-agent=" +ua.random)        options.add_argument("--no-sandbox")    options.add_argument("--headless")    options.add_argument("--incognito")    driver = webdriver.Remote(                command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',            options=options)    return driver    # try:    #     options = webdriver.ChromeOptions()            #     options.add_argument("--no-sandbox")    #     options.add_argument("--headless")    #     options.add_argument("--incognito")    #     # if proxy_enabled:    #     #     options.add_argument('--proxy-server=socks5://172.104.92.245:14900')    #     try:    #         driver = webdriver.Remote(    #             command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',    #         options=options)    #     except:    #         traceback.print_exc()    #         return None    #     return driver    # except:    #     traceback.print_exc()    #     driver=None    #     return None    # return driverdef run_once():    global count    global portnum    global bok    # global glog_table    # table=db['nda_log']    # print(jsobj)    # kw=jsobj['kw']        # options = webdriver.ChromeOptions()        # options.add_argument("--no-sandbox")    # options.add_argument("--headless")    # options.add_argument("--incognito")    # driver = webdriver.Remote(    #         command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',    #     options=options)            # if driver is not None:        #     break            ettoday_url_list = ['https://house.ettoday.net/news/1492047',        'https://house.ettoday.net/news/1492167',        'https://house.ettoday.net/news/1492288',        'https://house.ettoday.net/news/1492178',        'https://house.ettoday.net/news/1492229',        'https://house.ettoday.net/news/1492134',        'https://house.ettoday.net/news/1492240',        'https://house.ettoday.net/news/1492161',        'https://house.ettoday.net/news/1492168',        'https://house.ettoday.net/news/1492217']        # try:            for i in ettoday_url_list:        try:            driver=re_get_webdriver()        except:            portnum=random.randint(4555,4666)            print(portnum)            os.system('docker container stop p8816')            time.sleep(5)            os.system('docker container rm p8816')            time.sleep(5)            os.system('docker run -d -p '+str(portnum)+':4444 --name p8816 --dns 168.95.1.1 selenium/standalone-chrome:101.0')            bok += 1            count=0            time.sleep(5)            driver=re_get_webdriver()        time.sleep(3)        try:            driver.get(i)            time.sleep(3)            elmt_next = driver.find_element(By.XPATH, '//*[@id="house"]/div[3]/div[2]/div[6]/div/div/div[1]/article/div/div[3]/p[1]/a')            webdriver.ActionChains(driver).move_to_element(elmt_next).perform()            time.sleep(3)            webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()            print("cick!",i)            count+=1            print("count_time:",count,';borken_time:',bok)        # elmt = driver.find_element(By.XPATH, '//*[@id="yschsp"]')            time.sleep(random.randint(3,7))            driver.quit()        except:            driver.quit()            print("wrong",i,';borken_time:',bok)            time.sleep(5)    # except:    #     print('wrong for:',i)        # kw=jsobj['kw']        # if jsobj.get('domain') is None:        #     exclude=jsobj['exclude']        #     domain=None        # else:        #     domain=jsobj['domain']        #     exclude=None#         driver.get('https://www.google.com?num=100')#         time.sleep(17)#         while True:#             try:#                 print(driver.current_url)#                 break#             except:#                 traceback.print_exc()#                 driver=re_get_webdriver()#                 time.sleep(3)#                 driver.get('https://www.google.com?num=100')#                 time.sleep(3)#             time.sleep(3)#         elmt = driver.find_element(By.XPATH, "//input[@name='q']")#         time.sleep(1)#         elmt.send_keys(kw)#         elmt.send_keys(Keys.ENTER)#         time.sleep(6)#         elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")#         numresults=len(elmts)# #        time.sleep(9999)#         print('搜尋結果數量',numresults)#         if numresults==0:#             print(driver.current_url)#             print(driver.title)#             sys.exit()#         idx=1#         found=False#         test_lst=[]#         for elmt in elmts:#             href=elmt.get_attribute('href')#             txt=elmt.text#             if len(txt)>10:#                 if domain is not None:#                     for d in domain:#                         if d in href:#                             print('found....')#                             print('clicked....')#                             print(href)#                             print(txt)#                             print("ranking", idx)#                             found=True#                             webdriver.ActionChains(driver).move_to_element(elmt).perform()# #                            elmt.click()#                             webdriver.ActionChains(driver).move_to_element(elmt).click().perform()#                             table.insert({'kw':kw,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'result':numresults,'client':'64G'})#                             time.sleep(6)#                             return#                 else:#                     if exclude not in href:#                         test_lst.append(elmt)                    #             idx+=1#         if exclude is not None:#             print('exclude')#             elmt=random.choice(test_lst)#             print(elmt)#             webdriver.ActionChains(driver).move_to_element(elmt).perform()#             webdriver.ActionChains(driver).move_to_element(elmt).click().perform()#             time.sleep(5)#         if not found:#             table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})#     except:#         traceback.print_exc()#         print('exception')        # traceback.print_exc()        # time.sleep(5)# r=random.randint(0,27)# r=26# cursor=db.query('select json from seo_jobs where cust="KNIGHT" and plan="形象SEO" order by rand() limit 1')# for c in cursor:#     js=json.loads(c['json'])#     prefix=js['prefix']#     postfix=js['postfix']#     domain=js['domain'][0]#     positive=js['positive']#     rnd=js['rnd']portnum=random.randint(4555,4666)print(portnum)os.system('docker container stop p8816')time.sleep(5)os.system('docker container rm p8816')time.sleep(5)os.system('docker run -d -p '+str(portnum)+':4444 --name p8816 --dns 168.95.1.1 selenium/standalone-chrome:101.0')bok = 0count=0time.sleep(5)while True:    # run_once()    # time.sleep(10)    run_once()# kw=random.choice(positive)# kw2=random.choice(rnd)# count=0# while True:#     try:#         run_once({'domain':domain,'kw':prefix+" "+kw+" "+kw2})#         count+=1#     except:#         continue#     print('中場休息 次數',count)#     time.sleep(random.randint(120,150))
 |