cel_single.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. import time
  2. import traceback
  3. import dataset
  4. from selenium import webdriver
  5. from selenium.webdriver.support.ui import WebDriverWait
  6. from selenium.webdriver.common.by import By
  7. from selenium.webdriver.support import expected_conditions as EC
  8. from selenium.webdriver.common.keys import Keys
  9. from celery import Celery
  10. import redis
  11. from celery.schedules import crontab
  12. import celery
  13. from celery.signals import celeryd_init
  14. from celery import signals
  15. import random
  16. from click import Option
  17. from selenium import webdriver
  18. import time
  19. import sys
  20. app = Celery('tasks', backend ='redis://172.104.92.245', broker='redis://172.104.92.245')
  21. qname=None
  22. custname=None
  23. app.user_options['preload'].add(Option(('-Z', '--cust'),
  24. default='default',
  25. help='Configuration template to use.'))
  26. @signals.user_preload_options.connect
  27. def on_preload_parsed(options, **kwargs):
  28. global custname
  29. # if options.get_key('template')
  30. print(options['cust'])
  31. custname=options['cust']
  32. # print(options)
  33. @celeryd_init.connect
  34. def configure_workers(sender=None, **kwargs):
  35. global app
  36. global qname
  37. worker_name = sender.split("@")[-1]
  38. # app.conf.update(
  39. # task_concurrency=1, # Use 4 threads for concurrency
  40. # worker_prefetch_multiplier=1 # Prefetch one task at a time
  41. # )
  42. print("******")
  43. print(worker_name)
  44. qname=worker_name
  45. print("******")
  46. app.send_task(
  47. 'cel_single.selenium_jared_click3',
  48. args=(),
  49. queue=qname
  50. )
  51. # app.conf.beat_schedule = {
  52. # # Executes every Monday morning at 7:30 a.m.
  53. # 'add-every-monday-morning': {
  54. # 'task': 'cel_single.selenium_jared_click3',
  55. # 'schedule': crontab(minute='*/1'),
  56. # 'schedule': 65.0,
  57. # 'args': (),
  58. # 'options':{'queue': qname}
  59. # },
  60. # }
  61. #@app.on_after_configure.connect
  62. #def setup_periodic_tasks(sender, **kwargs):
  63. # print(app.request.delivery_info['routing_key'])
  64. # print(json_str)
  65. #sender task.request.hostnam
  66. # sender.add_periodic_task(100.0, selenium_jared_click.s(), name='add every 2 min')
  67. @app.task()
  68. def selenium_jared_click3():
  69. global custname
  70. global app
  71. db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
  72. options = webdriver.ChromeOptions()
  73. options.add_argument("--no-sandbox")
  74. options.add_argument("--headless")
  75. options.add_argument("--incognito")
  76. #options.add_argument('--blink-settings=imagesEnabled=false')
  77. PROXY=None
  78. if PROXY is not None:
  79. options.add_argument('--proxy-server=http://%s' % PROXY)
  80. mobile_emulation = {
  81. "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 },
  82. "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" }
  83. try:
  84. driver = webdriver.Chrome(options=options)
  85. # driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
  86. ## Create Undetected Chromedriver with Options
  87. # driver = uc.Chrome(options=options)
  88. # options = uc.ChromeOptions()
  89. except:
  90. traceback.print_exc()
  91. #kw='真理大學國際生'
  92. #domain='cia.au.edu.tw'
  93. if custname is None:
  94. custname='真理'
  95. cursor=db.query("SELECT cust,plan,prefix,domain,kw,positive FROM public.seo_jobs where cust='"+custname+"' order by random() limit 1")
  96. kw=None
  97. domain=None
  98. for c in cursor:
  99. cust=c['cust']
  100. kw=c['kw']
  101. plan=c['plan']
  102. prefix=c['prefix']
  103. domain=eval(c['domain'])[0]
  104. positive=eval(c['positive'])
  105. break
  106. #kw='真理大學教堂'
  107. #domain='udn.com'
  108. print(kw)
  109. print(domain)
  110. #kw='真理大學校友'
  111. #kw='真理大學國際生'
  112. #kw='真理大學張聰聯'
  113. #domain='au.edu.tw'
  114. #domain='pronews.tw'
  115. driver.get('https://www.google.com?num=100')
  116. time.sleep(3)
  117. print(driver.current_url)
  118. elmt = driver.find_element(By.XPATH, "//textarea[@name='q']")
  119. time.sleep(1)
  120. elmt.send_keys(kw)
  121. elmt.send_keys(Keys.ENTER)
  122. time.sleep(4)
  123. elmts = driver.find_elements(By.XPATH, "//a[@jsname='UWckNb']")
  124. numresults=len(elmts)
  125. print('搜尋結果數量',numresults)
  126. if numresults<=0:
  127. print(elmts)
  128. for elmt in elmts:
  129. href=elmt.get_attribute('href')
  130. txt=elmt.text
  131. # print(txt)
  132. # print(href)
  133. if domain in href:
  134. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  135. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  136. print(domain)
  137. print(href)
  138. app.control.broadcast('shutdown')
  139. # driver.quit()
  140. # sys.exit()
  141. return txt
  142. break
  143. app.control.broadcast('shutdown')
  144. # driver.quit()
  145. # sys.exit()
  146. return '{empty}'
  147. # time.sleep(5)