| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 | 
							- import traceback
 
- from selenium import webdriver
 
- from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 
- import time
 
- import os
 
- from selenium.webdriver.common.keys import Keys
 
- import datetime
 
- import urllib.parse
 
- from selenium.webdriver.support.ui import WebDriverWait
 
- from selenium.webdriver.common.by import By
 
- from selenium.webdriver.support import expected_conditions as EC
 
- import codecs
 
- import random
 
- from bs4 import BeautifulSoup
 
- import requests
 
- import time
 
- # import rpyc
 
- import sys
 
- import docker
 
- # import googlesearch
 
- import codecs
 
- import sys
 
- import time
 
- import dataset
 
- import os
 
- import html2text
 
- from userAgentRandomizer import userAgents
 
- def restart_browser():
 
-     os.system('docker container restart proxy1')
 
-     ua = userAgents()
 
-     user_agent = ua.random()        
 
-     time.sleep(8)
 
-     options = webdriver.ChromeOptions()
 
- #    options.add_argument("--headless")
 
-     options.add_argument('--proxy-server=socks5://127.0.0.1:9050')
 
-     options.add_argument("--user-agent=" +user_agent)
 
-     options.add_argument("--incognito")
 
-     driver=webdriver.Chrome(options=options)
 
-     driver.set_window_size(1400,1000)
 
-     driver.delete_all_cookies()
 
-     return driver
 
- driver=restart_browser()
 
- driver.get('https://news.google.com/topstories?hl=zh-TW&gl=TW&ceid=TW:zh-Hant')
 
- time.sleep(7)
 
- elmt=driver.find_element(By.XPATH,"//input[@aria-label='搜尋']")
 
- title_lst=['《咒術迴戰》降臨全家!不只推出獨家必收集點周邊 還能在1:1還原名場景與主角合照 引新聞',
 
- '梅雨季正是驗屋好時機!專家分享小撇步教你避免買到漏水屋 引新聞',
 
- '5月最夯球鞋款式大公開!女性消費者最愛「這個色調」 引新聞',
 
- '有影/本田仁美加入AKB48八年首登C位驚呼夢想成真!賣力學中文想挑戰翻唱《那些年》 引新聞',
 
- '萬綠叢中一點紅!白石麻衣化身「自衛隊」女教官 加入町田啓太「肉體派」新劇養眼陣容 引新聞',
 
- '超商變身辦公室!7-ELEVEN首創付費「多功能包廂專區」 遠距辦公上課更「便」民、開幕5折優惠 引新聞']
 
- title=random.choice(title_lst)
 
- #if elmt is None:
 
- #    elmt=driver.find_element(By.XPATH,"//input[@aria-label='搜尋']")
 
- if elmt is not None:
 
-     elmt.send_keys(title)
 
-     elmt.send_keys(Keys.ENTER)
 
-     elmt.send_keys(Keys.ENTER)
 
- time.sleep(7)
 
- elmts=driver.find_elements(By.XPATH,"//div[@jsname='esK7Lc']//div[@class='xrnccd']//a[@jsname='hXwDdf']")
 
- print(elmts[0].get_attribute('href'))
 
- print(elmts[0].text)
 
- time.sleep(9)
 
 
  |