cel_seo_click.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. import time
  2. import traceback
  3. import dataset
  4. from selenium import webdriver
  5. from selenium.webdriver.support.ui import WebDriverWait
  6. from selenium.webdriver.common.by import By
  7. from selenium.webdriver.support import expected_conditions as EC
  8. from selenium.webdriver.common.keys import Keys
  9. from celery import Celery
  10. import redis
  11. from celery.schedules import crontab
  12. import celery
  13. from billiard import current_process
  14. app = Celery('tasks', backend ='redis://172.104.92.245', broker='redis://172.104.92.245')
  15. #rkey=app.request.delivery_info['routing_key']
  16. print("******")
  17. print(app.select_queues)
  18. print("******")
  19. app.conf.beat_schedule = {
  20. # Executes every Monday morning at 7:30 a.m.
  21. 'add-every-monday-morning': {
  22. 'task': 'cel_seo_click.selenium_jared_click',
  23. # 'schedule': crontab(minute='*/1'),
  24. 'schedule': 30.0,
  25. 'args': (),
  26. 'options':{'queue': 'xxx'}
  27. },
  28. }
  29. #@app.on_after_configure.connect
  30. #def setup_periodic_tasks(sender, **kwargs):
  31. # print(app.request.delivery_info['routing_key'])
  32. # print(json_str)
  33. #sender task.request.hostnam
  34. # sender.add_periodic_task(100.0, selenium_jared_click.s(), name='add every 2 min')
  35. @app.task()
  36. def selenium_jared_click():
  37. print('*****')
  38. p = current_process()
  39. print(p.initargs[1].split('@')[1])
  40. print('*****')
  41. db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
  42. options = webdriver.ChromeOptions()
  43. options.add_argument("--no-sandbox")
  44. options.add_argument("--headless")
  45. options.add_argument("--incognito")
  46. #options.add_argument('--blink-settings=imagesEnabled=false')
  47. mobile_emulation = {
  48. "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 },
  49. "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" }
  50. try:
  51. driver = webdriver.Chrome(options=options)
  52. # driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
  53. ## Create Undetected Chromedriver with Options
  54. # driver = uc.Chrome(options=options)
  55. # options = uc.ChromeOptions()
  56. except:
  57. traceback.print_exc()
  58. #kw='真理大學國際生'
  59. #domain='cia.au.edu.tw'
  60. cursor=db.query("SELECT cust,plan,prefix,domain,kw,positive FROM public.seo_jobs where cust='真理' order by random() limit 1")
  61. kw=None
  62. domain=None
  63. for c in cursor:
  64. cust=c['cust']
  65. kw=c['kw']
  66. plan=c['plan']
  67. prefix=c['prefix']
  68. domain=eval(c['domain'])[0]
  69. positive=eval(c['positive'])
  70. break
  71. #kw='真理大學教堂'
  72. #domain='udn.com'
  73. print(kw)
  74. print(domain)
  75. #kw='真理大學校友'
  76. #kw='真理大學國際生'
  77. #kw='真理大學張聰聯'
  78. #domain='au.edu.tw'
  79. #domain='pronews.tw'
  80. driver.get('https://www.google.com?num=100')
  81. time.sleep(3)
  82. print(driver.current_url)
  83. elmt = driver.find_element(By.XPATH, "//textarea[@name='q']")
  84. time.sleep(1)
  85. elmt.send_keys(kw)
  86. elmt.send_keys(Keys.ENTER)
  87. time.sleep(4)
  88. elmts = driver.find_elements(By.XPATH, "//a[@jsname='UWckNb']")
  89. numresults=len(elmts)
  90. print('搜尋結果數量',numresults)
  91. if numresults<=0:
  92. print(elmts)
  93. for elmt in elmts:
  94. href=elmt.get_attribute('href')
  95. txt=elmt.text
  96. # print(txt)
  97. # print(href)
  98. if domain in href:
  99. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  100. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  101. print(domain)
  102. print(href)
  103. driver.quit()
  104. return txt
  105. break
  106. return '{empty}'
  107. # time.sleep(5)