Jared преди 3 години
родител
ревизия
b382ee2f6b
променени са 1 файла, в които са добавени 119 реда и са изтрити 0 реда
  1. 119 0
      INNNews/local_general_clickbot.py

+ 119 - 0
INNNews/local_general_clickbot.py

@@ -0,0 +1,119 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['general_log']
+
+
+  
+driver = None
+
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q=qs[0]
+    domain=qs[1]
+    global driver
+    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
+    print(googleurl)
+    driver.get(googleurl)
+    time.sleep(6)
+
+    elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
+
+    idx=1
+    ranking=-1
+    print(len(elmts))
+#    driver.save_screenshot('c:/tmp/test.png')
+
+    for elmt in elmts:
+
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+            if domain in href:
+                print('clicked....')
+                print(href)
+                print(txt)
+                print("ranking", idx)
+                table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                break
+        idx+=1
+
+def run_once(q):
+    global driver
+    result=[]
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+#    options.add_argument('--remote-debugging-port=9922')
+    options.add_argument('--remote-debugging-port='+str(q[2]))
+
+#    options.add_experimental_option("debuggerAddress", "127.00:9922")
+    # options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+#    driver = webdriver.Chrome(executable_path=r'C:\portable\webdriver\chrome98\chromedriver.exe',options=options)
+    if os.name=='nt':
+        driver = webdriver.Chrome(executable_path=r'C:\portable\webdriver\chrome98\chromedriver.exe',options=options)
+    else:
+        driver = webdriver.Chrome(executable_path='/opt/webdriver/98/chromedriver',options=options)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+
+    print('到此')
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+#for c in lst:
+#while True:
+#    try:
+#        c=random.choice(lst)
+#    except:
+#        traceback.print_exc()
+#    sleepint=random.randint(320,520)
+#    time.sleep(sleepint)
+
+class JParams(object):
+
+  def get(self, kw,domain,port):
+    print(kw)
+    print(domain)
+    try:
+        run_once( (kw,domain,port)   )
+    except:
+        os.system('docker container restart tiny1')
+        time.sleep(6)
+
+if __name__ == '__main__':
+  fire.Fire(JParams)
+