| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287 | 
							- #import redis
 
- import time
 
- import traceback
 
- #import json
 
- from selenium import webdriver
 
- from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 
- import time
 
- import urllib
 
- import os
 
- from selenium.webdriver.support.ui import WebDriverWait
 
- from selenium.webdriver.common.by import By
 
- from selenium.webdriver.support import expected_conditions as EC
 
- import dataset
 
- from selenium.webdriver.common.keys import Keys
 
- import json
 
- import random
 
- import time
 
- import redis
 
- import sys
 
- import codecs
 
- import random
 
- import os
 
- import time
 
- import requests
 
- import datetime
 
- driver=None
 
- from fake_useragent import UserAgent
 
- ua = UserAgent()
 
- #proxy_enabled=True
 
- # proxy_enabled=False
 
- # # https://youtu.be/cR2M5Khgxvc
 
- # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
 
- # glog_table=db['general_log']
 
- def re_get_webdriver():
 
-     # global port
 
-     global driver
 
-     global portnum
 
-     # os.system('killall chrome')
 
-     result=[]
 
-     # if driver is not None:
 
-     #     print('closing....')
 
-     #     driver.quit()
 
-     #     print('quit....')
 
-     #     driver=None
 
-     # os.system()
 
-     options = webdriver.ChromeOptions()
 
-     options.add_argument("--user-agent=" +ua.random)    
 
-     options.add_argument("--no-sandbox")
 
-     options.add_argument("--headless")
 
-     options.add_argument("--incognito")
 
-     driver = webdriver.Remote(
 
-                 command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
 
-             options=options)
 
-     return driver
 
-     # try:
 
-     #     options = webdriver.ChromeOptions()
 
-         
 
-     #     options.add_argument("--no-sandbox")
 
-     #     options.add_argument("--headless")
 
-     #     options.add_argument("--incognito")
 
-     #     # if proxy_enabled:
 
-     #     #     options.add_argument('--proxy-server=socks5://172.104.92.245:14900')
 
-     #     try:
 
-     #         driver = webdriver.Remote(
 
-     #             command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
 
-     #         options=options)
 
-     #     except:
 
-     #         traceback.print_exc()
 
-     #         return None
 
-     #     return driver
 
-     # except:
 
-     #     traceback.print_exc()
 
-     #     driver=None
 
-     #     return None
 
-     # return driver
 
- def run_once():
 
-     global count
 
-     global portnum
 
-     global bok
 
-     # global glog_table
 
-     # table=db['nda_log']
 
-     # print(jsobj)
 
-     # kw=jsobj['kw']
 
-     
 
-     # options = webdriver.ChromeOptions()
 
-     
 
-     # options.add_argument("--no-sandbox")
 
-     # options.add_argument("--headless")
 
-     # options.add_argument("--incognito")
 
-     # driver = webdriver.Remote(
 
-     #         command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
 
-     #     options=options)
 
-     
 
-         # if driver is not None:
 
-         #     break
 
-     
 
-     
 
-     ettoday_url_list = ['https://house.ettoday.net/news/1492047',
 
-         'https://house.ettoday.net/news/1492167',
 
-         'https://house.ettoday.net/news/1492288',
 
-         'https://house.ettoday.net/news/1492178',
 
-         'https://house.ettoday.net/news/1492229',
 
-         'https://house.ettoday.net/news/1492134',
 
-         'https://house.ettoday.net/news/1492240',
 
-         'https://house.ettoday.net/news/1492161',
 
-         'https://house.ettoday.net/news/1492168',
 
-         'https://house.ettoday.net/news/1492217']
 
-     
 
-     # try:        
 
-     for i in ettoday_url_list:
 
-         try:
 
-             driver=re_get_webdriver()
 
-         except:
 
-             portnum=random.randint(4555,4666)
 
-             print(portnum)
 
-             os.system('docker container stop p8816')
 
-             time.sleep(5)
 
-             os.system('docker container rm p8816')
 
-             time.sleep(5)
 
-             os.system('docker run -d -p '+str(portnum)+':4444 --name p8816 --dns 168.95.1.1 selenium/standalone-chrome:101.0')
 
-             bok += 1
 
-             count=0
 
-             time.sleep(5)
 
-             driver=re_get_webdriver()
 
-         time.sleep(3)
 
-         try:
 
-             driver.get(i)
 
-             time.sleep(3)
 
-             elmt_next = driver.find_element(By.XPATH, '//*[@id="house"]/div[3]/div[2]/div[6]/div/div/div[1]/article/div/div[3]/p[1]/a')
 
-             webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
 
-             time.sleep(3)
 
-             webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
 
-             print("cick!",i)
 
-             count+=1
 
-             print("count_time:",count,';borken_time:',bok)
 
-         # elmt = driver.find_element(By.XPATH, '//*[@id="yschsp"]')
 
-             time.sleep(random.randint(3,7))
 
-             driver.quit()
 
-         except:
 
-             driver.quit()
 
-             print("wrong",i,';borken_time:',bok)
 
-             time.sleep(5)
 
-     # except:
 
-     #     print('wrong for:',i)
 
-         # kw=jsobj['kw']
 
-         # if jsobj.get('domain') is None:
 
-         #     exclude=jsobj['exclude']
 
-         #     domain=None
 
-         # else:
 
-         #     domain=jsobj['domain']
 
-         #     exclude=None
 
- #         driver.get('https://www.google.com?num=100')
 
- #         time.sleep(17)
 
- #         while True:
 
- #             try:
 
- #                 print(driver.current_url)
 
- #                 break
 
- #             except:
 
- #                 traceback.print_exc()
 
- #                 driver=re_get_webdriver()
 
- #                 time.sleep(3)
 
- #                 driver.get('https://www.google.com?num=100')
 
- #                 time.sleep(3)
 
- #             time.sleep(3)
 
- #         elmt = driver.find_element(By.XPATH, "//input[@name='q']")
 
- #         time.sleep(1)
 
- #         elmt.send_keys(kw)
 
- #         elmt.send_keys(Keys.ENTER)
 
- #         time.sleep(6)
 
- #         elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
 
- #         numresults=len(elmts)
 
- # #        time.sleep(9999)
 
- #         print('搜尋結果數量',numresults)
 
- #         if numresults==0:
 
- #             print(driver.current_url)
 
- #             print(driver.title)
 
- #             sys.exit()
 
- #         idx=1
 
- #         found=False
 
- #         test_lst=[]
 
- #         for elmt in elmts:
 
- #             href=elmt.get_attribute('href')
 
- #             txt=elmt.text
 
- #             if len(txt)>10:
 
- #                 if domain is not None:
 
- #                     for d in domain:
 
- #                         if d in href:
 
- #                             print('found....')
 
- #                             print('clicked....')
 
- #                             print(href)
 
- #                             print(txt)
 
- #                             print("ranking", idx)
 
- #                             found=True
 
- #                             webdriver.ActionChains(driver).move_to_element(elmt).perform()
 
- # #                            elmt.click()
 
- #                             webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
 
- #                             table.insert({'kw':kw,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'result':numresults,'client':'64G'})
 
- #                             time.sleep(6)
 
- #                             return
 
- #                 else:
 
- #                     if exclude not in href:
 
- #                         test_lst.append(elmt)
 
-                     
 
- #             idx+=1
 
- #         if exclude is not None:
 
- #             print('exclude')
 
- #             elmt=random.choice(test_lst)
 
- #             print(elmt)
 
- #             webdriver.ActionChains(driver).move_to_element(elmt).perform()
 
- #             webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
 
- #             time.sleep(5)
 
- #         if not found:
 
- #             table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
 
- #     except:
 
- #         traceback.print_exc()
 
- #         print('exception')
 
-         # traceback.print_exc()
 
-         
 
- # time.sleep(5)
 
- # r=random.randint(0,27)
 
- # r=26
 
- # cursor=db.query('select json from seo_jobs where cust="KNIGHT" and plan="形象SEO" order by rand() limit 1')
 
- # for c in cursor:
 
- #     js=json.loads(c['json'])
 
- #     prefix=js['prefix']
 
- #     postfix=js['postfix']
 
- #     domain=js['domain'][0]
 
- #     positive=js['positive']
 
- #     rnd=js['rnd']
 
- portnum=random.randint(4555,4666)
 
- print(portnum)
 
- os.system('docker container stop p8816')
 
- time.sleep(5)
 
- os.system('docker container rm p8816')
 
- time.sleep(5)
 
- os.system('docker run -d -p '+str(portnum)+':4444 --name p8816 --dns 168.95.1.1 selenium/standalone-chrome:101.0')
 
- bok = 0
 
- count=0
 
- time.sleep(5)
 
- while True:
 
-     # run_once()
 
-     # time.sleep(10)
 
-     run_once()
 
- # kw=random.choice(positive)
 
- # kw2=random.choice(rnd)
 
- # count=0
 
- # while True:
 
- #     try:
 
- #         run_once({'domain':domain,'kw':prefix+" "+kw+" "+kw2})
 
- #         count+=1
 
- #     except:
 
- #         continue
 
- #     print('中場休息 次數',count)
 
- #     time.sleep(random.randint(120,150))
 
 
  |