jared 1 rok pred
rodič
commit
98926a7bc6
1 zmenil súbory, kde vykonal 329 pridanie a 0 odobranie
  1. 329 0
      deployment/click_twice.py

+ 329 - 0
deployment/click_twice.py

@@ -0,0 +1,329 @@
+#import redis
+import time
+import traceback
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+#import urllib
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+from selenium.webdriver.common.keys import Keys
+import json
+import random
+import time
+#import redis
+import sys
+import codecs
+import random
+import os
+import time
+import requests
+#JNOTE: CLICK TWICE
+
+driver=None
+dockername='p4444'
+
+is_docker=True
+#is_docker=False
+db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
+url_white=[]
+cursor=db.query("select url from seo_whitelist where cust='啟翔'")
+for c in cursor:
+    url_white.append(c['url'])
+print(url_white)
+#sys.exit()
+#db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+def scrolling(driver,pgnum):
+    ub = driver.find_element_by_css_selector('body')
+    for i in range(pgnum):
+        ub.send_keys(Keys.PAGE_DOWN)
+        if pgnum>1:
+            time.sleep(0.3)
+
+
+
+if is_docker:
+    portnum=random.randint(4444,4555)
+    print(portnum)
+    os.system('docker container stop '+dockername)
+    time.sleep(0.5)
+    os.system('docker container rm '+dockername)
+    time.sleep(0.5)
+    os.system('docker run -d -p '+str(portnum)+':4444 --shm-size=2g --name '+dockername+' --dns 168.95.1.1 selenium/standalone-chrome:103.0')
+    time.sleep(7)
+
+
+def re_get_webdriver():
+    global port
+    global driver
+    global portnum
+    global is_docker
+    result=[]
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        print('quit....')
+        driver=None
+    try:
+        options = webdriver.ChromeOptions()
+        options.add_argument("--no-sandbox")
+        options.add_argument("--headless")
+        options.add_argument("--incognito")
+#        options.add_argument('--proxy-server=socks5://172.104.92.245:14900')
+
+        mobile_emulation = {
+            "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 },
+            "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" }
+#        options.add_experimental_option("mobileEmulation", mobile_emulation)
+
+        if is_docker:
+            try:
+                driver = webdriver.Remote(
+                    command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
+                options=options)
+            except:
+                traceback.print_exc()
+                time.sleep(9999)
+                return None
+            return driver
+
+
+
+        try:
+            driver = webdriver.Chrome(options=options)
+
+        except:
+            traceback.print_exc()
+            return None
+        return driver
+    except:
+        traceback.print_exc()
+        driver=None
+        return None
+    return driver
+
+
+def click_allowed():
+    global driver
+    global url_white
+    elmts = driver.find_elements(By.XPATH, "//a")
+    elmt_lst=[]
+    for elmt in elmts:
+        elmt_lst.append(elmt)
+    random.shuffle(elmt_lst)
+    for elmt in elmt_lst:
+        href=elmt.get_attribute('href')
+        for url in url_white:
+            if url in href:
+                print(href)
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                time.sleep(5)
+                return
+
+
+def run_once(jsobj):
+
+    table=db['seo_jobs_ranking']
+#    history=db['seo_search_history']
+
+    print(jsobj)
+    kw=jsobj['kw']
+
+    i=100
+    while True:
+        driver=re_get_webdriver()
+        print('re_get_webdriver')
+        if driver is not None:
+            break
+        time.sleep(3)
+    try:
+        kw=jsobj['kw']
+        if jsobj.get('domain') is None:
+            exclude=jsobj['exclude']
+            domain=None
+        else:
+            domain=jsobj['domain']
+            exclude=None
+        driver.get('https://www.google.com?num=100')
+        time.sleep(1)
+        while True:
+            try:
+                print(driver.current_url)
+                break
+            except:
+                traceback.print_exc()
+                driver=re_get_webdriver()
+                time.sleep(3)
+                driver.get('https://www.google.com?num=100')
+#                time.sleep(3)
+
+            time.sleep(3)
+
+#        time.sleep(9999)
+#        elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+        elmt = driver.find_element(By.XPATH, "//textarea[@name='q']")
+
+#        elmt = driver.find_element(By.XPATH, "//textarea[@type='search']")
+
+        time.sleep(1)
+        elmt.send_keys(kw)
+        elmt.send_keys(Keys.ENTER)
+        time.sleep(3)
+
+#        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+        elmts = driver.find_elements(By.XPATH, "//a[@jsname='UWckNb']")
+
+
+        numresults=len(elmts)
+        
+        print('搜尋結果數量',numresults)
+        if numresults==0:
+            print(driver.current_url)
+            print(driver.title)
+            sys.exit()
+#        time.sleep(9999)
+
+        idx=1
+        found=False
+        test_lst=[]
+        clickelmt=None
+        neg_count=0
+        neg_total=0
+        clickidx=0
+        clickhref=''
+        clicktitle=''
+        for elmt in elmts:
+            href=elmt.get_attribute('href')
+            txt=elmt.text
+#            history.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt})
+            if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt:
+                neg_count+=1
+                neg_total+=idx
+            if len(txt)>10:
+                if domain is not None:
+                    random.shuffle(domain)
+                    for d in domain:
+                        if d in href:
+                            print('found....')
+                            print('clicked....')
+                            print(href)
+                            print(txt)
+                            print("ranking", idx)
+                            found=True
+                            clickelmt=elmt
+                            clickidx=idx
+                            clickhref=href
+                            clicktitle=txt
+
+                else:
+                    if exclude not in href:
+                        test_lst.append(elmt)
+
+
+
+                    
+            idx+=1
+        if exclude is not None:
+            print('exclude')
+            elmt=random.choice(test_lst)
+            print(elmt)
+
+            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+            scrolling(driver,3)
+            time.sleep(5)
+        if neg_count ==0:
+            negstr='0'
+        else:
+            negstr=str(neg_total/neg_count)
+        print(' negative: ' +negstr)
+        if not found:
+            True
+            table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
+        else:
+            webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
+            webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
+            print('clicked...')
+            table.insert({'ranking':clickidx,'kw':kw,'results':numresults,'url':clickhref,'title':clicktitle,'avg_neg':negstr})
+            scrolling(driver,3)
+            time.sleep(6)
+            print('sleep 6')
+            click_allowed()
+
+
+
+
+            return
+
+
+    except:
+        traceback.print_exc()
+
+        print('exception')
+        traceback.print_exc()
+
+    driver.quit()
+time.sleep(5)
+
+#r=random.randint(0,7)
+#r=987
+#JNOTE: 關鍵字點擊
+related=''
+
+#cursor=db.query('SELECT cust,plan,prefix,domain,kw,positive FROM public.seo_jobs order by random() limit 1')
+cursor=db.query("SELECT cust,plan,prefix,domain,kw,positive FROM public.seo_jobs where cust='啟翔' order by random() limit 1")
+
+for c in cursor:
+    cust=c['cust']
+    kw=c['kw']
+    plan=c['plan']
+    prefix=c['prefix']
+    domain=eval(c['domain'])
+    positive=eval(c['positive'])
+    break
+r=9999
+
+if r==11:
+    cust='啟翔'
+    plan='形象SEO'
+    postfix=''
+    domain=['65c248e0fd89780001035964']
+    kw='啟翔輕金屬工序'
+    prefix=''
+    positive=['']
+
+
+
+#    positive=['集仕多']
+#    positive=['集仕多 AIGV']
+#    positive=['集仕多 三立']
+
+#    positive=['台北室內設計公司排名']
+#    positive=[related]
+#    positive=['半 日照 植物 推薦']
+#    positive=['3 坪 多大']  
+#    positive=['鞋櫃']
+#    positive=['裝修屋子']
+#    positive=['']
+#    kw='幸福空間'
+#    kw='輕裝修'
+#    kw='輕裝修'
+
+
+#朱英凱
+#琢隱設計
+
+#os.system('curl --socks5 choozmo:choozmo9@172.104.92.245:14900 http://www.google.com')
+
+#newkw=prefix+" "+kw+' '+random.choice(positive)
+newkw=kw
+print(newkw)
+#newkw=kw
+run_once({'domain':domain,'kw':newkw})
+