root 2 år sedan
förälder
incheckning
0ebe1b722c

+ 12 - 12
INNNews/general_clickbot.py

@@ -82,19 +82,20 @@ def process_query(qs):
     for elmt in elmts:
         href=elmt.get_attribute('href')
         txt=elmt.text
-        if len(txt)>10:
-            if domain in href:
-                print('clicked....')
-                print('點擊網址',href)
-                print('標題',txt)
-                print("ranking", idx)
-                table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
-                webdriver.ActionChains(driver).move_to_element(elmt).perform()
-                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-                time.sleep(5)
-                break
+        if domain in href:
+            print('clicked....')
+            print('點擊網址',href)
+            print('標題',txt)
+            print("ranking", idx)
+            table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
+            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+            time.sleep(5)
+            break
         idx+=1
     db.close()
+    driver.quit()
+
 def run_once(q):
     global driver
     result=[]
@@ -115,7 +116,6 @@ def run_once(q):
 
     process_query(q)
     time.sleep(3)
-    driver.quit()
 
 
 #for c in lst:

+ 29 - 44
INNNews/general_clickbot_proxy.py

@@ -54,7 +54,7 @@ def process_query(qs):
     googleurl = 'https://www.google.com/?num=100'
     driver.get(googleurl)
     time.sleep(6)
-    send_kw_elmt = driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
+    send_kw_elmt = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')))
     send_kw_elmt.send_keys(q)
     time.sleep(3)
     send_kw_elmt.send_keys(Keys.ENTER)
@@ -76,24 +76,30 @@ def process_query(qs):
         table.insert(
             {'kw': q, 'domain': domain, 'ranking': idx, 'title': txt, 'url': href, 'dt': datetime.datetime.now()})
         webdriver.ActionChains(driver).move_to_element(elmts[0]).perform()
+        time.sleep(3)
         webdriver.ActionChains(driver).move_to_element(elmts[0]).click().perform()
         time.sleep(5)
-    for elmt in elmts:
-        href=elmt.get_attribute('href')
-        txt=elmt.text
-        if len(txt)>10:
-            if domain in href:
-                print('clicked....')
-                print('點擊網址',href)
-                print('標題',txt)
-                print("ranking", idx)
-                table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
-                webdriver.ActionChains(driver).move_to_element(elmt).perform()
-                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
-                time.sleep(5)
-                break
-        idx+=1
+    else:
+        for elmt in elmts:
+            href=elmt.get_attribute('href')
+            txt=elmt.text
+            if len(txt)>10:
+                if domain in href:
+                    print('clicked....')
+                    print('點擊網址',href)
+                    print('標題',txt)
+                    print("ranking", idx)
+                    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                    time.sleep(3)
+                    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                    table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
+                    time.sleep(5)
+                    break
+            idx+=1
     db.close()
+    print('資料庫關閉')
+    driver.quit()
+
 def run_once(q):
     global driver
     result=[]
@@ -102,42 +108,27 @@ def run_once(q):
     options = webdriver.ChromeOptions()
     options.add_argument('--headless')
     options.add_argument('--remote-debugging-port=9222')
-    options.add_experimental_option("debuggerAddress", "127.0.0.1:9922")
+    options.add_experimental_option("debuggerAddress",f"127.0.0.1:{q[2]}")
     # options.add_argument("--user-agent=" +user_agent)
     options.add_argument("--incognito")
-<<<<<<< HEAD
     r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
     data=r.get('google_proxy')
     jstext=data.decode('utf-8')
     jsobj=json.loads(jstext)
+    #print('Free proxy',jsobj)
     proxy=random.choice(jsobj)
-    print('Freeproxy',proxy)
-    change_ip = ["'--proxy-server='+proxy","--proxy-server=socks5://127.0.0.1:9050","--proxy-server=socks5://192.53.174.202:8180"]
-    options.add_argument('--proxy-server=socks5://192.53.174.202:8180')
-    driver = webdriver.Chrome(
-    options=options,service=s)
-=======
-    if 'sorry' in driver.current_url:
-        r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
-        data=r.get('google_proxy')
-        jstext=data.decode('utf-8')
-        jsobj=json.loads(jstext)
-        print('Free proxy',jsobj)
-        proxy=random.choice(jsobj)
-        change_ip_list = ['--proxy-server=%s' % proxy,"--proxy-server=socks5://127.0.0.1:9050","--proxy-server=socks5://192.53.174.202:8180"]
-        change_ip = random.choice(change_ip_list)
-        options.add_argument(change_ip)
-        print('使用代理ip',change_ip)
+    i5 = "--proxy-server=socks5://172.104.93.163:41800"
+    change_ip_list = ['--proxy-server=%s' % proxy,"--proxy-server=socks5://127.0.0.1:9050","--proxy-server=socks5://192.53.174.202:8180"]
+    change_ip = random.choice(change_ip_list)
+    options.add_argument(i5)
+    #print('使用代理ip',change_ip)
         
     driver = webdriver.Chrome(options=options,service=s)
->>>>>>> 28b37f8f0a95c485aaf3da04362d35694cb233c9
 
     driver.delete_all_cookies()
     driver.set_window_size(1400,1000)
 
     process_query(q)
-    time.sleep(3)
-    driver.quit()
 
 
 #for c in lst:
@@ -152,13 +143,7 @@ def run_once(q):
 class JParams(object):
 
   def get(self, kw,domain,port):
-<<<<<<< HEAD
-    os.system('docker container restart tiny1')
-    time.sleep(1)
-    run_once( ('台北 禮儀 社','sctt.com.tw','tiny1')   )
-=======
     run_once( (kw,domain,port) )
->>>>>>> 28b37f8f0a95c485aaf3da04362d35694cb233c9
 
 
 if __name__ == '__main__':

+ 157 - 0
INNNews/general_clickbot_proxy.py.save

@@ -0,0 +1,157 @@
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import datetime
+import dataset
+import time
+import traceback
+import sys
+import fire
+import redis
+
+
+driver = None
+
+
+def rua():
+    pool = [
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
+    ]
+    return random.choice(pool)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    table=db['general_log']
+    q=qs[0]
+    domain=qs[1]
+    global driver
+    googleurl = 'https://www.google.com/?num=100'
+    driver.get(googleurl)
+    time.sleep(6)
+    send_kw_elmt = driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
+    send_kw_elmt.send_keys(q)
+    time.sleep(3)
+    send_kw_elmt.send_keys(Keys.ENTER)
+    time.sleep(6)
+    print(driver.current_url)
+    elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
+
+    idx=1
+    ranking=-1
+    print('網頁數量',len(elmts))
+#    driver.save_screenshot('c:/tmp/test.png')
+    if 'site' in q:
+        href = elmts[0].get_attribute('href')
+        txt = elmts[0].text
+        print('clicked....')
+        print(href)
+        print(txt)
+        print("ranking", idx)
+        table.insert(
+            {'kw': q, 'domain': domain, 'ranking': idx, 'title': txt, 'url': href, 'dt': datetime.datetime.now()})
+        webdriver.ActionChains(driver).move_to_element(elmts[0]).perform()
+        webdriver.ActionChains(driver).move_to_element(elmts[0]).click().perform()
+        time.sleep(5)
+    for elmt in elmts:
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+            if domain in href:
+                print('clicked....')
+                print('點擊網址',href)
+                print('標題',txt)
+                print("ranking", idx)
+                table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                time.sleep(5)
+                break
+        idx+=1
+    db.close()
+def run_once(q):
+    global driver
+    result=[]
+    s = Service('/root/driver/chromedriver')
+    user_agent = rua()
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--remote-debugging-port=9222')
+    options.add_experimental_option("debuggerAddress", "127.0.0.1:9922")
+    # options.add_argument("--user-agent=" +user_agent)
+    options.add_argument("--incognito")
+    r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
+    data=r.get('google_proxy')
+    jstext=data.decode('utf-8')
+    jsobj=json.loads(jstext)
+    proxy=random.choice(jsobj)
+    print('Freeproxy',proxy)
+    change_ip = ["'--proxy-server='+proxy","--proxy-server=socks5://127.0.0.1:9050","--proxy-server=socks5://192.53.174.202:8180"]
+    options.add_argument('--proxy-server=socks5://192.53.174.202:8180')
+    driver = webdriver.Chrome(
+    options=options,service=s)
+    if 'sorry' in driver.current_url:
+        r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
+        data=r.get('google_proxy')
+        jstext=data.decode('utf-8')
+        jsobj=json.loads(jstext)
+        print('Free proxy',jsobj)
+        proxy=random.choice(jsobj)
+        change_ip_list = ['--proxy-server=%s' % proxy,"--proxy-server=socks5://127.0.0.1:9050","--proxy-server=socks5://192.53.174.202:8180"]
+        change_ip = random.choice(change_ip_list)
+        options.add_argument(change_ip)
+        print('使用代理ip',change_ip)
+        
+    driver = webdriver.Chrome(options=options,service=s)
+
+    driver.delete_all_cookies()
+    driver.set_window_size(1400,1000)
+
+    process_query(q)
+    time.sleep(3)
+    driver.quit()
+
+
+#for c in lst:
+#while True:
+#    try:
+#        c=random.choice(lst)
+#    except:
+#        traceback.print_exc()
+#    sleepint=random.randint(320,520)
+#    time.sleep(sleepint)
+
+class JParams(object):
+
+  def get(self, kw,domain,port):
+    run_once( (kw,domain,port) )
+
+
+if __name__ == '__main__':
+  fire.Fire(JParams)
+

+ 25 - 10
INNNews/run_sheet_2.py

@@ -22,23 +22,23 @@ def run_once(pport, dockername):
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
     lst = []
     
-    cursor = db.query('select term, domain from seo.selected_kw where client not in ("毛怪","火柴星人")')
+    cursor = db.query('select term, domain from seo.selected_kw where client not in ("毛怪","hhh")')
     for c in cursor:
         lst.append([c['term'], c['domain']])
-    
+    #lst2=[['台灣仁本生命服務團隊','sctt.com.tw']]
     obj = random.choice(lst)
     kw = obj[0]
     domain = obj[1]
     print(kw, domain)
+    db.close()
     
-    intval = os.system(f'python3 general_clickbot.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')
+    intval = os.system(f'python3 general_clickbot_proxy.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')
     
     print('執行完成genetal_clickbot')
     
     if intval == -1:
         print('-1')
         sys.exit()
-    db.close()
 
 def run_specific(pport, dockername):
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
@@ -47,19 +47,34 @@ def run_specific(pport, dockername):
     cursor = db.query('select term, domain from seo.selected_kw where client="神助物流"')
     for c in cursor:
         lst.append([c['term'], c['domain']])
-    
+    db.close()
+
     obj = random.choice(lst)
-    kw = obj[0]
+    print('隨機',obj)
+    #obj = ['貨架','gs-rack.com']
+    if obj[0]=='重型 架':
+        lst_1 = ['重型架 無軌式重型移動','重型架 無軌移動貨架','重型 架 神助物流']
+        kw = random.choice(lst_1)
+    elif obj[0]=='貨架':
+        lst_2=['貨架 無軌式重型移動','貨架 無軌式重型移動貨架','貨架 無軌移動貨架儲存方案']
+        kw = random.choice(lst_2)
+    #elif obj[0]=='倉儲 設備':
+    #    kw = '倉儲 設備'
+    #    print(kw)
+    else:
+        lst_3=[]
+        lst_3.append(obj[0]+' 神助物流')
+        lst_3.append(obj[0])
+        kw = random.choice(lst_3)
     domain = obj[1]
-    print(kw,domain)
+    print('執行的關鍵字',kw,domain)
 
-    intval = os.system(f'python3 general_clickbot.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')    
+    intval = os.system(f'python3 general_clickbot_proxy.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')    
     print('執行完成神助genetal_clickbot')
 
     if intval == -1:
         print('-1')
         sys.exit()
-    db.close()
 
 class JParams(object):
     
@@ -80,7 +95,7 @@ class JParams(object):
                 time.sleep(20)
                 break           
             except:
-                os.system('docker container restart tiny1')
+                os.system('docker container restart tiny8')
                 time.sleep(15)
 
 

+ 103 - 0
INNNews/run_sheet_3.py

@@ -0,0 +1,103 @@
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+import redis
+import time
+import fire
+import dataset
+
+
+def run_once(pport, dockername):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst = []
+    
+    cursor = db.query('select term, domain from seo.selected_kw where client not in ("毛怪","hhh")')
+    for c in cursor:
+        lst.append([c['term'], c['domain']])
+    #lst2=[['台灣仁本生命服務團隊','sctt.com.tw']]
+    obj = random.choice(lst)
+    kw = obj[0]
+    domain = obj[1]
+    print(kw, domain)
+    db.close()
+    
+    intval = os.system(f'python3 general_clickbot_proxy.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')
+    
+    print('執行完成genetal_clickbot')
+    
+    if intval == -1:
+        print('-1')
+        sys.exit()
+
+def run_specific(pport, dockername):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    lst = []
+    
+    cursor = db.query('select term, domain from seo.selected_kw where client="神助物流"')
+    for c in cursor:
+        lst.append([c['term'], c['domain']])
+    db.close()
+
+    obj = random.choice(lst)
+    print('隨機',obj)
+    #obj = ['貨架','gs-rack.com']
+    if obj[0]=='重型 架':
+        lst_1 = ['重型架 無軌式重型移動','重型架 無軌移動貨架','重型 架 神助物流']
+        kw = random.choice(lst_1)
+    elif obj[0]=='貨架':
+        lst_2=['貨架 無軌式重型移動','貨架 無軌式重型移動貨架','貨架 無軌移動貨架儲存方案']
+        kw = random.choice(lst_2)
+    #elif obj[0]=='倉儲 設備':
+    #    kw = '倉儲 設備'
+    #    print(kw)
+    else:
+        lst_3=[]
+        lst_3.append(obj[0]+' 神助物流')
+        lst_3.append(obj[0])
+        kw = random.choice(lst_3)
+    domain = obj[1]
+    print('執行的關鍵字',kw,domain)
+
+    intval = os.system(f'python3 general_clickbot_proxy.py get --kw="{kw}" --domain="{domain}" --port="{str(pport)}"')    
+    print('執行完成神助genetal_clickbot')
+
+    if intval == -1:
+        print('-1')
+        sys.exit()
+
+class JParams(object):
+    
+    def get(self, port=9222):
+        while True:
+            try:
+                os.system('docker container restart tiny1')
+                os.system('docker container restart tiny2')
+                os.system('docker container restart tiny3')
+                os.system('docker container restart tiny8')
+                #os.system('docker container restart tiny10')
+                time.sleep(1)
+                run_once(9922, 'tiny1')
+                run_once(9923, 'tiny2')
+                run_once(9924, 'tiny3')
+                #run_once(9930, 'tiny10')  
+                run_specific(9929, 'tiny8')
+                time.sleep(20)
+                break           
+            except:
+                os.system('docker container restart tiny8')
+                time.sleep(15)
+
+
+if __name__ == '__main__':
+    fire.Fire(JParams)