from_news.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import os
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import dataset
  13. from selenium.webdriver.common.keys import Keys
  14. import json
  15. import random
  16. import time
  17. import redis
  18. import sys
  19. import codecs
  20. import random
  21. import os
  22. import time
  23. import requests
  24. from time import sleep
  25. #from wrapt_timeout_decorator import *
  26. import pymysql
  27. pymysql.install_as_MySQLdb()
  28. driver=None
  29. # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  30. headers = {
  31. "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
  32. "Content-Type": "application/x-www-form-urlencoded"
  33. }
  34. def send_msg(kw):
  35. params = {"message":kw}
  36. # r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  37. #@timeout(20)
  38. def re_get_webdriver():
  39. global port
  40. global driver
  41. result=[]
  42. if driver is not None:
  43. print('closing....')
  44. driver.quit()
  45. os.system('killall chrome')
  46. print('quit....')
  47. driver=None
  48. try:
  49. options = webdriver.ChromeOptions()
  50. options.add_argument("--no-sandbox")
  51. options.add_argument("--disable-dev-shm-usage")
  52. # options.add_argument("--headless")
  53. # options.add_argument('--proxy-server='+proxy)
  54. # options.add_argument('--proxy-server=socks5://172.104.93.163:41800')
  55. # options.add_argument('--proxy-server=socks5://127.0.0.1:9050')
  56. # options.add_argument('--proxy-server=%s' % proxy)
  57. # print('--proxy-server=%s' % proxy)
  58. # options.add_experimental_option("debuggerAddress", '127.0.0.1:9922')
  59. # options.add_argument("--user-agent=" +user_agent)
  60. options.add_argument("--incognito")
  61. driver=None
  62. try:
  63. # driver = webdriver.Chrome(options=options)
  64. if os.name=='nt':
  65. driver = webdriver.Chrome(executable_path='C:/portable/webdriver/chrome102/chromedriver.exe',options=options)
  66. else:
  67. # driver = webdriver.Chrome(options=options)
  68. driver = webdriver.Chrome(options=options,executable_path='/Users/zooeytsai/Downloads/chromedriver 4')
  69. except:
  70. traceback.print_exc()
  71. driver.quit()
  72. sys.exit()
  73. return
  74. driver.set_window_size(1400,1000)
  75. return
  76. except:
  77. traceback.print_exc()
  78. driver=None
  79. return None
  80. def run_once(jsobj):
  81. # table=db['rank_detection']
  82. print(jsobj)
  83. global driver
  84. kw=jsobj['kw']
  85. i=100
  86. if driver is None:
  87. re_get_webdriver()
  88. if driver is None:
  89. return
  90. driver.get('https://news.google.com/topstories?hl=zh-TW&gl=TW&ceid=TW:zh-Hant')
  91. time.sleep(5)
  92. elmt = driver.find_element(By.XPATH, "//input[contains(@aria-label,'搜尋')]")
  93. print(elmt)
  94. time.sleep(1)
  95. elmt.send_keys(kw)
  96. elmt.send_keys(Keys.ENTER)
  97. time.sleep(7)
  98. elmts = driver.find_elements(By.XPATH, "//main//div[@class='xrnccd']//article/a[contains(@href,'./articles/')]")
  99. print(len(elmts))
  100. elmt=elmts[-1]
  101. print(elmt)
  102. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  103. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  104. time.sleep(8)
  105. driver.quit()
  106. sys.exit()
  107. par1='tiny3'
  108. port='9942'
  109. os.system('killall chrome')
  110. #os.system('docker container restart proxy1')
  111. time.sleep(8)
  112. #os.system('curl --socks5 choozmo:choozmo9@172.104.93.163:41800 http://www.google.com')
  113. time.sleep(3)
  114. # r=random.randint(0,1)
  115. r=1
  116. if r==0:
  117. prefix="site:hhh.com.tw "
  118. domain=['hhh.com.tw']
  119. positive=['北歐 風格 定義','','房 間隔 局','小 房間 設計','裝潢 費用','室內 裝修','設計 公司','透 天 裝潢','鄉村 風 裝潢']
  120. elif r==1:
  121. prefix="site:gs-rack.com "
  122. domain=['gs-rack.com']
  123. positive=['神助','移動模組','移動貨架','無軌道','創新設計','專利移動櫃','重型架','物流設備','客戶實績','儲存方案','客戶服務','平台移動','重型電動','分離式','重型移動','冷庫重型移動','防爆倉','專利移動模組','輕中型移動貨架','傳統後推','少量多樣','大量少樣','重型貨品']
  124. elif r==2:
  125. prefix="基金 "
  126. postfix=' site:ltn.com.tw'
  127. domain=['ltn.com.tw']
  128. positive=['對沖基金','俄烏戰爭','美國升息']
  129. kw=random.choice(positive)
  130. run_once({'domain':domain,'kw':kw+prefix})