Forráskód Böngészése

general_clickbot_3

zooeytsai 2 éve
szülő
commit
00134e65ac
1 módosított fájl, 133 hozzáadás és 0 törlés
  1. 133 0
      INNNews/general_clickbot_3.py

+ 133 - 0
INNNews/general_clickbot_3.py

@@ -0,0 +1,133 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+import redis
+
+driver = None
+
+
+def process_query(qs):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    table = db['general_log']
+    q = qs[0]
+    domain = qs[1]
+    global driver
+    googleurl = 'https://www.google.com/?num=100'
+    driver.get(googleurl)
+    time.sleep(6)
+    send_kw_elmt = WebDriverWait(driver, 10).until(EC.presence_of_element_located(
+        (By.XPATH, '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')))
+    send_kw_elmt.send_keys(q)
+    time.sleep(3)
+    send_kw_elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+    print(driver.current_url)
+    elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+    
+    idx = 1
+    ranking = -1
+    print('網頁數量', len(elmts))
+    #    driver.save_screenshot('c:/tmp/test.png')
+    if 'site' in q:
+        href = elmts[0].get_attribute('href')
+        txt = elmts[0].text
+        print('clicked....')
+        print(href)
+        print(txt)
+        print("ranking", idx)
+        table.insert(
+            {'kw': q, 'domain': domain, 'ranking': idx, 'title': txt, 'url': href, 'dt': datetime.datetime.now()})
+        webdriver.ActionChains(driver).move_to_element(elmts[0]).perform()
+        time.sleep(3)
+        webdriver.ActionChains(driver).move_to_element(elmts[0]).click().perform()
+        time.sleep(5)
+    else:
+        for elmt in elmts:
+            href = elmt.get_attribute('href')
+            txt = elmt.text
+            if len(txt) > 10:
+                if domain in href:
+                    print('clicked....')
+                    print('點擊網址', href)
+                    print('標題', txt)
+                    print("ranking", idx)
+                    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                    time.sleep(3)
+                    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                    table.insert({'kw': q, 'domain': domain, 'ranking': idx, 'title': txt, 'url': href,'dt': datetime.datetime.now()})
+                    time.sleep(5)
+                    break
+            idx += 1
+    db.close()
+    print('資料庫關閉')
+    driver.quit()
+
+
+def run_once(q):
+    global driver
+    s = Service('/root/driver/chromedriver')
+    options = webdriver.ChromeOptions()
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    options.add_argument('--headless')
+    # options.add_argument('--remote-debugging-port=9222')
+    # options.add_experimental_option("debuggerAddress", f"127.0.0.1:{q[2]}")
+    # options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+    r = redis.Redis(host='db.ptt.cx', port=6379, db=2, password='choozmo9')
+    data = r.get('google_proxy')
+    jstext = data.decode('utf-8')
+    jsobj = json.loads(jstext)
+    # print('Free proxy',jsobj)
+    proxy = random.choice(jsobj)
+    i5 = "--proxy-server=socks5://172.104.93.163:41800"
+    change_ip_list = ['--proxy-server=%s' % proxy, "--proxy-server=socks5://127.0.0.1:9050",
+                      "--proxy-server=socks5://192.53.174.202:8180"]
+    change_ip = random.choice(change_ip_list)
+    options.add_argument(i5)
+    # print('使用代理ip',change_ip)
+    
+    driver = webdriver.Chrome(options=options, service=s)
+    
+    driver.delete_all_cookies()
+    driver.set_window_size(1400, 1000)
+    
+    process_query(q)
+
+
+# for c in lst:
+# while True:
+#    try:
+#        c=random.choice(lst)
+#    except:
+#        traceback.print_exc()
+#    sleepint=random.randint(320,520)
+#    time.sleep(sleepint)
+
+class JParams(object):
+    
+    def get(self, kw, domain):
+        run_once((kw, domain))
+
+
+if __name__ == '__main__':
+    fire.Fire(JParams)
+