gnews_click.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. import traceback
  2. from selenium import webdriver
  3. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  4. import time
  5. import os
  6. from selenium.webdriver.common.keys import Keys
  7. import datetime
  8. import urllib.parse
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import codecs
  13. import random
  14. from bs4 import BeautifulSoup
  15. import requests
  16. import time
  17. # import rpyc
  18. import sys
  19. import docker
  20. # import googlesearch
  21. import codecs
  22. import sys
  23. import time
  24. import dataset
  25. import os
  26. import html2text
  27. from userAgentRandomizer import userAgents
  28. def restart_browser():
  29. os.system('docker container restart proxy1')
  30. ua = userAgents()
  31. user_agent = ua.random()
  32. time.sleep(8)
  33. options = webdriver.ChromeOptions()
  34. # options.add_argument("--headless")
  35. options.add_argument('--proxy-server=socks5://127.0.0.1:9050')
  36. options.add_argument("--user-agent=" +user_agent)
  37. options.add_argument("--incognito")
  38. driver=webdriver.Chrome(options=options)
  39. driver.set_window_size(1400,1000)
  40. driver.delete_all_cookies()
  41. return driver
  42. driver=restart_browser()
  43. driver.get('https://news.google.com/topstories?hl=zh-TW&gl=TW&ceid=TW:zh-Hant')
  44. time.sleep(7)
  45. elmt=driver.find_element(By.XPATH,"//input[@aria-label='搜尋']")
  46. title_lst=['《咒術迴戰》降臨全家!不只推出獨家必收集點周邊 還能在1:1還原名場景與主角合照 引新聞',
  47. '梅雨季正是驗屋好時機!專家分享小撇步教你避免買到漏水屋 引新聞',
  48. '5月最夯球鞋款式大公開!女性消費者最愛「這個色調」 引新聞',
  49. '有影/本田仁美加入AKB48八年首登C位驚呼夢想成真!賣力學中文想挑戰翻唱《那些年》 引新聞',
  50. '萬綠叢中一點紅!白石麻衣化身「自衛隊」女教官 加入町田啓太「肉體派」新劇養眼陣容 引新聞',
  51. '超商變身辦公室!7-ELEVEN首創付費「多功能包廂專區」 遠距辦公上課更「便」民、開幕5折優惠 引新聞']
  52. title=random.choice(title_lst)
  53. #if elmt is None:
  54. # elmt=driver.find_element(By.XPATH,"//input[@aria-label='搜尋']")
  55. if elmt is not None:
  56. elmt.send_keys(title)
  57. elmt.send_keys(Keys.ENTER)
  58. elmt.send_keys(Keys.ENTER)
  59. time.sleep(7)
  60. elmts=driver.find_elements(By.XPATH,"//div[@jsname='esK7Lc']//div[@class='xrnccd']//a[@jsname='hXwDdf']")
  61. print(elmts[0].get_attribute('href'))
  62. print(elmts[0].text)
  63. time.sleep(9)