Jared 2 éve
szülő
commit
17ae04a18b
1 módosított fájl, 185 hozzáadás és 0 törlés
  1. 185 0
      customers/click_hhh.py

+ 185 - 0
customers/click_hhh.py

@@ -0,0 +1,185 @@
+#import redis
+import time
+import traceback
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import urllib
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+from selenium.webdriver.common.keys import Keys
+import json
+import random
+import time
+import redis
+import sys
+import codecs
+import random
+import os
+import time
+import requests
+driver=None
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+def re_get_webdriver():
+    global port
+    global driver
+    global portnum
+    result=[]
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        print('quit....')
+        driver=None
+    try:
+        options = webdriver.ChromeOptions()
+        options.add_argument("--no-sandbox")
+        options.add_argument("--headless")
+        options.add_argument("--incognito")
+        try:
+
+            driver = webdriver.Remote(
+                command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
+            options=options)
+        except:
+            traceback.print_exc()
+            return None
+        return driver
+    except:
+        traceback.print_exc()
+        driver=None
+        return None
+    return driver
+
+
+def run_once(jsobj):
+
+    table=db['rank_detection']
+    print(jsobj)
+    kw=jsobj['kw']
+
+    i=100
+    while True:
+        driver=re_get_webdriver()
+        if driver is not None:
+            break
+        time.sleep(3)
+    try:
+        kw=jsobj['kw']
+        if jsobj.get('domain') is None:
+            exclude=jsobj['exclude']
+            domain=None
+        else:
+            domain=jsobj['domain']
+            exclude=None
+        driver.get('https://www.google.com?num=100')
+        time.sleep(17)
+        while True:
+            try:
+                print(driver.current_url)
+                break
+            except:
+                traceback.print_exc()
+                driver=re_get_webdriver()
+                time.sleep(3)
+                driver.get('https://www.google.com?num=100')
+                time.sleep(3)
+
+            time.sleep(3)
+
+        elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+        time.sleep(1)
+        elmt.send_keys(kw)
+        elmt.send_keys(Keys.ENTER)
+        time.sleep(6)
+
+        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+
+        numresults=len(elmts)
+#        time.sleep(9999)
+        print('搜尋結果數量',numresults)
+        if numresults==0:
+            print(driver.current_url)
+            print(driver.title)
+            send_msg('stop working...')
+            sys.exit()
+
+        idx=1
+        found=False
+        test_lst=[]
+        for elmt in elmts:
+            href=elmt.get_attribute('href')
+            txt=elmt.text
+            if len(txt)>10:
+                if domain is not None:
+                    for d in domain:
+                        if d in href:
+                            print('found....')
+                            print('clicked....')
+                            print(href)
+                            print(txt)
+                            print("ranking", idx)
+                            found=True
+
+                            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+#                            elmt.click()
+                            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                            table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt})
+
+                            time.sleep(6)
+                            return
+                else:
+                    if exclude not in href:
+                        test_lst.append(elmt)
+                    
+            idx+=1
+        if exclude is not None:
+            print('exclude')
+            elmt=random.choice(test_lst)
+            print(elmt)
+
+            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+            time.sleep(5)
+
+        if not found:
+            table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
+
+    except:
+        traceback.print_exc()
+
+        print('exception')
+        traceback.print_exc()
+
+    driver.quit()
+time.sleep(5)
+
+r=random.randint(0,2)
+if r==0 or r==1:
+    prefix="幸福空間 "
+    postfix=' site:hhh.com.tw'
+    domain=['hhh.com.tw']
+    positive=['','設計','設計師','室內 設計','裝潢','室內 裝修','設計 公司','裝潢','北歐風']
+if r==2:
+    prefix=""
+    postfix=' site:hhh.com.tw'
+    domain=['hhh.com.tw']
+    positive=['艾立思','','艾立思軟裝','艾立思集團','御見設計','艾立思 家具訂製','艾立思 精品家具','艾立思 軟裝飾品','ELIZ','艾立思 郭柏君','艾立思 家配師','艾立思 郭柏君']
+
+
+portnum=random.randint(4444,4555)
+print(portnum)
+os.system('docker container stop p4444')
+time.sleep(1)
+os.system('docker container rm p4444')
+time.sleep(1)
+os.system('docker run -d -p '+str(portnum)+':4444 --name p4444 --dns 168.95.1.1 selenium/standalone-chrome:103.0')
+
+time.sleep(18)
+kw=random.choice(positive)
+run_once({'domain':domain,'kw':prefix+" "+kw})
+