from_news.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import os
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import dataset
  13. from selenium.webdriver.common.keys import Keys
  14. import json
  15. import random
  16. import time
  17. import redis
  18. import sys
  19. import codecs
  20. import random
  21. import os
  22. import time
  23. import requests
  24. from time import sleep
  25. #from wrapt_timeout_decorator import *
  26. driver=None
  27. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  28. headers = {
  29. "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
  30. "Content-Type": "application/x-www-form-urlencoded"
  31. }
  32. def send_msg(kw):
  33. params = {"message":kw}
  34. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  35. #@timeout(20)
  36. def re_get_webdriver():
  37. global port
  38. global driver
  39. result=[]
  40. if driver is not None:
  41. print('closing....')
  42. driver.quit()
  43. os.system('killall chrome')
  44. print('quit....')
  45. driver=None
  46. try:
  47. options = webdriver.ChromeOptions()
  48. options.add_argument("--no-sandbox")
  49. options.add_argument("--disable-dev-shm-usage")
  50. # options.add_argument("--headless")
  51. # options.add_argument('--proxy-server='+proxy)
  52. # options.add_argument('--proxy-server=socks5://172.104.93.163:41800')
  53. # options.add_argument('--proxy-server=socks5://127.0.0.1:9050')
  54. # options.add_argument('--proxy-server=%s' % proxy)
  55. # print('--proxy-server=%s' % proxy)
  56. # options.add_experimental_option("debuggerAddress", '127.0.0.1:9922')
  57. # options.add_argument("--user-agent=" +user_agent)
  58. options.add_argument("--incognito")
  59. driver=None
  60. try:
  61. # driver = webdriver.Chrome(options=options)
  62. if os.name=='nt':
  63. driver = webdriver.Chrome(executable_path='C:/portable/webdriver/chrome102/chromedriver.exe',options=options)
  64. else:
  65. driver = webdriver.Chrome(options=options)
  66. except:
  67. traceback.print_exc()
  68. driver.quit()
  69. sys.exit()
  70. return
  71. driver.set_window_size(1400,1000)
  72. return
  73. except:
  74. traceback.print_exc()
  75. driver=None
  76. return None
  77. def run_once(jsobj):
  78. table=db['rank_detection']
  79. print(jsobj)
  80. global driver
  81. kw=jsobj['kw']
  82. i=100
  83. if driver is None:
  84. re_get_webdriver()
  85. if driver is None:
  86. return
  87. driver.get('https://news.google.com/topstories?hl=zh-TW&gl=TW&ceid=TW:zh-Hant')
  88. time.sleep(5)
  89. elmt = driver.find_element(By.XPATH, "//input[contains(@aria-label,'搜尋')]")
  90. print(elmt)
  91. time.sleep(1)
  92. elmt.send_keys(kw)
  93. elmt.send_keys(Keys.ENTER)
  94. time.sleep(7)
  95. elmts = driver.find_elements(By.XPATH, "//main//div[@class='xrnccd']//article/a[contains(@href,'./articles/')]")
  96. print(len(elmts))
  97. elmt=elmts[-1]
  98. print(elmt)
  99. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  100. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  101. time.sleep(8)
  102. driver.quit()
  103. sys.exit()
  104. par1='tiny3'
  105. port='9942'
  106. os.system('killall chrome')
  107. #os.system('docker container restart proxy1')
  108. time.sleep(8)
  109. #os.system('curl --socks5 choozmo:choozmo9@172.104.93.163:41800 http://www.google.com')
  110. time.sleep(3)
  111. r=random.randint(0,1)
  112. r=2
  113. if r==0:
  114. prefix="site:hhh.com.tw "
  115. domain=['hhh.com.tw']
  116. positive=['北歐 風格 定義','','房 間隔 局','小 房間 設計','裝潢 費用','室內 裝修','設計 公司','透 天 裝潢','鄉村 風 裝潢']
  117. elif r==1:
  118. prefix="site:gs-rack.com "
  119. domain=['gs-rack.com']
  120. positive=['神助','移動模組','移動貨架','無軌道','創新設計','專利移動櫃','重型架','物流設備','客戶實績','儲存方案','客戶服務','平台移動','重型電動','分離式','重型移動','冷庫重型移動','防爆倉','專利移動模組','輕中型移動貨架','傳統後推','少量多樣','大量少樣','重型貨品']
  121. elif r==2:
  122. prefix="錢濤 "
  123. postfix=' site:ltn.com.tw'
  124. domain=['ltn.com.tw']
  125. positive=['對沖基金','俄烏戰爭','美國升息']
  126. kw=random.choice(positive)
  127. run_once({'domain':domain,'kw':prefix+kw+postfix})