Pārlūkot izejas kodu

Merge branch 'master' of http://git.choozmo.com:3000/choozmo/kw_tools

Jared 2 gadi atpakaļ
vecāks
revīzija
adff2d2986
1 mainītis faili ar 133 papildinājumiem un 0 dzēšanām
  1. 133 0
      choozmo/sns_clickbot.py

+ 133 - 0
choozmo/sns_clickbot.py

@@ -0,0 +1,133 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+#import pymysql
+#pymysql.install_as_MySQLdb()
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['seo_log']
+driver = None
+
+
+def rua():
+    pool = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
+    ]
+    return random.choice(pool)
+
+
+path_z = '/Users/zooeytsai/Downloads/chromedriver 2'
+def restart_browser():
+    options = webdriver.ChromeOptions()
+    options.add_argument("user-agent=%s" % rua())
+    options.add_argument('--headless')
+    driver=webdriver.Chrome(options=options,executable_path=path_z)
+    driver.set_window_size(950,6000)
+    return driver
+
+
+def process_query(qs):
+    q=qs[0]
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst = []
+    client = '班尼斯'
+    cursor = db.query(f'select term from seo.sns where client="{client}"')
+    for c in cursor:
+        lst.append(c['term'])
+    for term in lst:
+        print(term)
+        driver = restart_browser()
+        escaped_search_term = urllib.parse.quote(term)
+        googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100, 'zh-TW')
+        print(googleurl)
+        driver.get(googleurl)
+        time.sleep(10)
+        # fname=term.replace(' ','_')
+        # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
+        # df=pd.DataFrame()
+    
+        elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+        idx = 1
+        for elmt in elmts:
+            href=elmt.get_attribute('href')
+            txt=elmt.text
+            if len(txt)>10:
+                if href in lst:
+                    print('clicked....')
+                    print(href)
+                    print(txt)
+                    print("ranking", idx)
+                    table.insert({'kw':q,'client':client,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
+                    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                    time.sleep(5)
+                    break
+            idx+=1
+
+def run_once(q):
+    global driver
+    result=[]
+    s = Service('/root/driver/chromedriver')
+    user_agent = rua()
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option("debuggerAddress", f"127.0.0.1:{q[2]}")
+    options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+
+    driver = webdriver.Chrome(
+    options=options,service=s)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+
+    print('到此')
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+#for c in lst:
+#while True:
+#    try:
+#        c=random.choice(lst)
+#    except:
+#        traceback.print_exc()
+#    sleepint=random.randint(320,520)
+#    time.sleep(sleepint)
+
+class JParams(object):
+
+  def get(self, kw,domain,port):
+    print('關鍵字',kw)
+    run_once( (kw,domain,port)   )
+
+
+if __name__ == '__main__':
+  fire.Fire(JParams)
+