jared il y a 1 an
Parent
commit
05a3094937
1 fichiers modifiés avec 277 ajouts et 0 suppressions
  1. 277 0
      deployment/pool_bad_clicks.py

+ 277 - 0
deployment/pool_bad_clicks.py

@@ -0,0 +1,277 @@
+#import redis
+import time
+import traceback
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+#import urllib
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+from selenium.webdriver.common.keys import Keys
+import json
+import random
+import time
+#import redis
+import sys
+import codecs
+import random
+import os
+import time
+import requests
+driver=None
+dockername='p4444'
+
+#is_docker=True
+is_docker=False
+db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres')
+
+if is_docker:
+    portnum=random.randint(4444,4555)
+    print(portnum)
+    os.system('docker container stop '+dockername)
+    time.sleep(0.5)
+    os.system('docker container rm '+dockername)
+    time.sleep(0.5)
+    os.system('docker run -d -p '+str(portnum)+':4444 --shm-size=2g --name '+dockername+' --dns 168.95.1.1 selenium/standalone-chrome:103.0')
+    time.sleep(7)
+
+
+def re_get_webdriver():
+    global port
+    global driver
+    global portnum
+    global is_docker
+    result=[]
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        print('quit....')
+        driver=None
+    try:
+        options = webdriver.ChromeOptions()
+        options.add_argument("--no-sandbox")
+        options.add_argument("--headless")
+        options.add_argument("--incognito")
+#        options.add_argument('--proxy-server=socks5://172.104.92.245:14900')
+
+        mobile_emulation = {
+            "deviceMetrics": { "width": 360, "height": 640, "pixelRatio": 3.0 },
+            "userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" }
+#        options.add_experimental_option("mobileEmulation", mobile_emulation)
+
+        if is_docker:
+            try:
+                driver = webdriver.Remote(
+                    command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
+                options=options)
+            except:
+                traceback.print_exc()
+                time.sleep(9999)
+                return None
+            return driver
+
+
+
+        try:
+            driver = webdriver.Chrome(options=options)
+
+        except:
+            traceback.print_exc()
+            return None
+        return driver
+    except:
+        traceback.print_exc()
+        driver=None
+        return None
+    return driver
+
+
+
+def run_once(jsobj):
+    global db
+    table=db['bad_clicks']
+    print(jsobj)
+    kw=jsobj['kw']
+
+    i=100
+    while True:
+        driver=re_get_webdriver()
+        print('re_get_webdriver')
+        if driver is not None:
+            break
+        time.sleep(3)
+    try:
+        kw=jsobj['kw']
+        if jsobj.get('domain') is None:
+            exclude=jsobj['exclude']
+            domain=None
+        else:
+            domain=jsobj['domain']
+            exclude=None
+        driver.get('https://www.google.com?num=100')
+        time.sleep(1)
+        while True:
+            try:
+                print(driver.current_url)
+                break
+            except:
+                traceback.print_exc()
+                driver=re_get_webdriver()
+                time.sleep(3)
+                driver.get('https://www.google.com?num=100')
+#                time.sleep(3)
+
+            time.sleep(3)
+        elmt = driver.find_element(By.XPATH, "//textarea[@type='search']")
+
+        time.sleep(1)
+        elmt.send_keys(kw)
+        elmt.send_keys(Keys.ENTER)
+        time.sleep(3)
+
+#        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+        elmts = driver.find_elements(By.XPATH, "//a[@jsname='UWckNb']")
+
+
+        numresults=len(elmts)
+        
+        print('搜尋結果數量',numresults)
+        if numresults==0:
+            print(driver.current_url)
+            print(driver.title)
+            sys.exit()
+#        time.sleep(9999)
+
+        idx=1
+        found=False
+        test_lst=[]
+        clickelmt=None
+        neg_count=0
+        neg_total=0
+        clickidx=0
+        clickhref=''
+        clicktitle=''
+        for elmt in elmts:
+            href=elmt.get_attribute('href')
+            txt=elmt.text
+            if '坑殺' in txt or '侵占' in txt or '判決書' in txt or '強佔' in txt or '掏空' in txt or '送達公告' in txt or '違反勞動'in txt:
+                neg_count+=1
+                neg_total+=idx
+            if len(txt)>10:
+                if domain is not None:
+                    random.shuffle(domain)
+                    for d in domain:
+                        if d in href:
+                            print('found....')
+                            print('clicked....')
+                            print(href)
+                            print(txt)
+                            print("ranking", idx)
+                            found=True
+                            clickelmt=elmt
+                            clickidx=idx
+                            clickhref=href
+                            clicktitle=txt
+
+                else:
+                    if exclude not in href:
+                        test_lst.append(elmt)
+
+
+
+                    
+            idx+=1
+        if exclude is not None:
+            print('exclude')
+            elmt=random.choice(test_lst)
+            print(elmt)
+
+            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+            time.sleep(3)
+        if neg_count ==0:
+            negstr='0'
+        else:
+            negstr=str(neg_total/neg_count)
+        print(' negative: ' +negstr)
+        if not found:
+            True
+            table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
+
+        else:
+            webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
+            webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
+            print('clicked...')
+            table.insert({'ranking':clickidx,'kw':kw,'results':numresults,'url':clickhref,'title':clicktitle,'avg_neg':negstr})
+            time.sleep(80)
+            print('sleep 6')
+            return
+
+
+    except:
+        traceback.print_exc()
+
+        print('exception')
+        traceback.print_exc()
+
+    driver.quit()
+time.sleep(5)
+
+r=random.randint(0,24)
+print(r)
+#r=987
+#r=
+#JNOTE: 關鍵字點擊
+related=''
+r=5
+#r=6
+#聲明
+
+entries=[]
+
+entry={'cust':'啟翔',
+       'plan':'文章',
+       'prefix':'',
+       'kw':'1111 4970-808',
+       'postfix':'',
+       'domain':['Comp_Info.aspx?vNo=48173'],
+       'positive':['']}
+entries.append(entry)
+entry={'cust':'啟翔',
+       'plan':'文章',
+       'prefix':'',
+       'kw':'作業員 4970808',
+       'postfix':'',
+       'domain':['job/3uhrh'],
+       'positive':['']}
+entry={'cust':'啟翔',
+       'plan':'文章',
+       'prefix':'',
+       'kw':'facebook 100078739363391 啟翔',
+       'postfix':'',
+       'domain':['100078739363391'],
+       'positive':['']}
+entries.append(entry)
+
+
+
+entries.append(entry)
+
+entry=random.choice(entries)
+
+
+#朱英凱
+#琢隱設計
+
+#os.system('curl --socks5 choozmo:choozmo9@172.104.92.245:14900 http://www.google.com')
+
+newkw=entry['prefix']+" "+entry['kw']+' '+random.choice(entry['positive'])
+print(newkw)
+#newkw=kw
+run_once({'domain':entry['domain'],'kw':newkw})
+
+