jared 1 年之前
父节点
当前提交
e3cb41d457
共有 1 个文件被更改,包括 146 次插入0 次删除
  1. 146 0
      deployment/cel_single.py

+ 146 - 0
deployment/cel_single.py

@@ -0,0 +1,146 @@
+import time
+import traceback
+import dataset
+from selenium import webdriver
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.common.keys import Keys
+from celery import Celery
+import redis
+from celery.schedules import crontab
+import celery
+from celery.signals import celeryd_init
+from celery import signals
+import random
+from click import Option
+from selenium import webdriver
+import time
+
+
+
+app = Celery('tasks', backend ='redis://172.104.92.245', broker='redis://172.104.92.245')
+qname=None
+custname=None
+app.user_options['preload'].add(Option(('-Z', '--cust'),
+                                       default='default',
+                                       help='Configuration template to use.'))
+
+@signals.user_preload_options.connect
+def on_preload_parsed(options, **kwargs):
+    global custname
+#    if options.get_key('template')
+    print(options['cust'])
+    custname=options['cust']
+#    print(options)
+
+
+@celeryd_init.connect
+def configure_workers(sender=None, **kwargs):
+    global qname
+    worker_name = sender.split("@")[-1]
+    print("******")
+    print(worker_name)
+    qname=worker_name
+    print("******")
+    app.send_task(
+    'cel_seo_click.selenium_jared_click',
+    args=(),
+    queue=qname)
+
+
+#@app.on_after_configure.connect
+#def setup_periodic_tasks(sender, **kwargs):
+#    print(app.request.delivery_info['routing_key'])
+#    print(json_str)
+    #sender task.request.hostnam
+#    sender.add_periodic_task(100.0, selenium_jared_click.s(), name='add every 2 min')
+
+@app.task()
+def selenium_jared_click():
+    global custname
+
+    db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
+    options = webdriver.ChromeOptions()
+    options.add_argument("--no-sandbox")
+    options.add_argument("--headless")
+    options.add_argument("--incognito")
+    #options.add_argument('--blink-settings=imagesEnabled=false')
+    PROXY=None
+
+    if PROXY is not None:
+        options.add_argument('--proxy-server=http://%s' % PROXY)
+
+
+    mobile_emulation = {
+        "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 },
+        "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" }
+    try:
+
+
+        driver = webdriver.Chrome(options=options)
+    #    driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
+    ## Create Undetected Chromedriver with Options
+    #    driver = uc.Chrome(options=options)
+    #    options = uc.ChromeOptions()
+
+    except:
+        traceback.print_exc()
+
+    #kw='真理大學國際生'
+    #domain='cia.au.edu.tw'
+    if custname is None:
+        custname='真理'
+    cursor=db.query("SELECT cust,plan,prefix,domain,kw,positive FROM public.seo_jobs where cust='"+custname+"' order by random() limit 1")
+    kw=None
+    domain=None
+    for c in cursor:
+        cust=c['cust']
+        kw=c['kw']
+        plan=c['plan']
+        prefix=c['prefix']
+        domain=eval(c['domain'])[0]
+        positive=eval(c['positive'])
+        break
+    #kw='真理大學教堂'
+    #domain='udn.com'
+    print(kw)
+    print(domain)
+
+    #kw='真理大學校友'
+    #kw='真理大學國際生'
+    #kw='真理大學張聰聯'
+    #domain='au.edu.tw'
+    #domain='pronews.tw'
+
+    driver.get('https://www.google.com?num=100')
+    time.sleep(3)
+    print(driver.current_url)
+    elmt = driver.find_element(By.XPATH, "//textarea[@name='q']")
+    time.sleep(1)
+    elmt.send_keys(kw)
+    elmt.send_keys(Keys.ENTER)
+    time.sleep(4)
+
+    elmts = driver.find_elements(By.XPATH, "//a[@jsname='UWckNb']")
+    numresults=len(elmts)
+    print('搜尋結果數量',numresults)
+    if numresults<=0:
+        print(elmts)
+    for elmt in elmts:
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+    #    print(txt)
+    #    print(href)
+        if domain in href:
+            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+            print(domain)
+            print(href)
+            driver.quit()
+            return txt
+            break
+    return '{empty}'
+#    time.sleep(5)
+
+