12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- import traceback
- from selenium import webdriver
- from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
- import time
- import os
- from selenium.webdriver.common.keys import Keys
- import datetime
- import urllib.parse
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support import expected_conditions as EC
- import codecs
- import random
- from bs4 import BeautifulSoup
- import requests
- import time
- # import rpyc
- import sys
- import docker
- # import googlesearch
- import codecs
- import sys
- import time
- import dataset
- import os
- import html2text
- from userAgentRandomizer import userAgents
- def restart_browser():
- os.system('docker container restart proxy1')
- ua = userAgents()
- user_agent = ua.random()
- time.sleep(8)
- options = webdriver.ChromeOptions()
- # options.add_argument("--headless")
- options.add_argument('--proxy-server=socks5://127.0.0.1:9050')
- options.add_argument("--user-agent=" +user_agent)
- options.add_argument("--incognito")
- driver=webdriver.Chrome(options=options)
- driver.set_window_size(1400,1000)
- driver.delete_all_cookies()
- return driver
- driver=restart_browser()
- driver.get('https://news.google.com/topstories?hl=zh-TW&gl=TW&ceid=TW:zh-Hant')
- time.sleep(7)
- elmt=driver.find_element(By.XPATH,"//input[@aria-label='搜尋']")
- title_lst=['《咒術迴戰》降臨全家!不只推出獨家必收集點周邊 還能在1:1還原名場景與主角合照 引新聞',
- '梅雨季正是驗屋好時機!專家分享小撇步教你避免買到漏水屋 引新聞',
- '5月最夯球鞋款式大公開!女性消費者最愛「這個色調」 引新聞',
- '有影/本田仁美加入AKB48八年首登C位驚呼夢想成真!賣力學中文想挑戰翻唱《那些年》 引新聞',
- '萬綠叢中一點紅!白石麻衣化身「自衛隊」女教官 加入町田啓太「肉體派」新劇養眼陣容 引新聞',
- '超商變身辦公室!7-ELEVEN首創付費「多功能包廂專區」 遠距辦公上課更「便」民、開幕5折優惠 引新聞']
- title=random.choice(title_lst)
- #if elmt is None:
- # elmt=driver.find_element(By.XPATH,"//input[@aria-label='搜尋']")
- if elmt is not None:
- elmt.send_keys(title)
- elmt.send_keys(Keys.ENTER)
- elmt.send_keys(Keys.ENTER)
- time.sleep(7)
- elmts=driver.find_elements(By.XPATH,"//div[@jsname='esK7Lc']//div[@class='xrnccd']//a[@jsname='hXwDdf']")
- print(elmts[0].get_attribute('href'))
- print(elmts[0].text)
- time.sleep(9)
|