root 2 år sedan
förälder
incheckning
13d737190d
3 ändrade filer med 189 tillägg och 5 borttagningar
  1. 0 3
      gen_seo2.py
  2. 186 0
      gen_seo3.py
  3. 3 2
      test_ip.py

+ 0 - 3
gen_seo2.py

@@ -10,15 +10,12 @@ import sys
 import random
 import os
 import time
-<<<<<<< HEAD
 from userAgentRandomizer import userAgents
 
-=======
 import requests
 #import pymysql
 
 #pymysql.install_as_MySQLdb()
->>>>>>> 60aab8b5861808a3b1359bbffc1117b5d72236d4
 driver = None
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')

+ 186 - 0
gen_seo3.py

@@ -0,0 +1,186 @@
+import traceback
+import dataset
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.chrome.service import Service
+import json
+import redis
+import sys
+import random
+import os
+import time
+#from userAgentRandomizer import userAgents
+
+import requests
+#import pymysql
+
+#pymysql.install_as_MySQLdb()
+driver = None
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+
+headers = {
+    "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
+    "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+def send_msg(kw):
+    params = {"message": kw}
+    r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
+
+
+def re_get_webdriver():
+    global port
+    global driver
+    result = []
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+        os.system('killall chrome')
+        print('quit....')
+        driver = None
+    try:
+        s = Service('/root/driver/chromedriver102')
+        options = webdriver.ChromeOptions()
+        options.add_argument("--no-sandbox")
+        options.add_argument("--disable-dev-shm-usage")
+        options.add_argument("--headless")
+        #options.add_argument('--remote-debugging-port=9222')
+        #options.add_experimental_option("debuggerAddress", '127.0.0.1:9927')
+        options.add_argument("--incognito")
+        r = redis.Redis(host='db.ptt.cx', port=6379, db=2, password='choozmo9')
+        data = r.get('google_proxy')
+        jstext = data.decode('utf-8')
+        jsobj = json.loads(jstext)
+        proxy = random.choice(jsobj)
+        change_ip_list = ['--proxy-server=%s' % proxy, "--proxy-server=socks5://127.0.0.1:9050",
+                          "--proxy-server=socks5://192.53.174.202:8180"]
+        change_ip = random.choice(change_ip_list)
+        options.add_argument('--proxy-server=socks5://172.104.93.163:41800')
+        print('使用代理ip', change_ip)
+        driver = webdriver.Chrome(options=options)
+        #driver.delete_all_cookies()
+        driver.set_window_size(1400, 1000)
+    except:
+        traceback.print_exc()
+        driver = None
+        return None
+
+
+def run_once(jsobj):
+    table = db['rank_detection']
+    print(jsobj)
+    global driver
+    
+    #    i=random.randint(0,9)
+    i = 100
+    if driver is None:
+        time.sleep(8)
+        re_get_webdriver()
+    if driver is None:
+        return
+    try:
+        kw = jsobj['kw']
+        if jsobj.get('domain') is None:
+            exclude = jsobj['exclude']
+            domain = None
+        else:
+            domain = jsobj['domain']
+            exclude = None
+        
+        #        driver.get('https://www.google.com?num=100')
+        driver.get('https://www.google.com?num=20')
+        
+        time.sleep(3)
+        print(driver.current_url)
+        elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+        time.sleep(1)
+        elmt.send_keys(kw)
+        elmt.send_keys(Keys.ENTER)
+        time.sleep(6)
+        print(driver.current_url)
+        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+        
+        numresults = len(elmts)
+        print('搜尋結果數量', numresults)
+        if numresults == 0:
+            send_msg('stop working...')
+            sys.exit()
+        
+        idx = 1
+        found = False
+        test_lst = []
+        txt_dict = {}
+        for elmt in elmts:
+            href = elmt.get_attribute('href')
+            txt = elmt.text
+            if len(txt) > 10:
+                if domain is not None:
+                    if domain in href:
+                        print('found....')
+                        print('clicked....')
+                        print(href)
+                        print(txt)
+                        print("ranking", idx)
+                        found = True 
+                        webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                        webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                        table.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt})
+                        time.sleep(6)
+                        break
+                else:
+                    ex = False
+                    for ee in exclude:
+                        if ee in href:
+                            ex = True
+                    if not ex:
+                        test_lst.append(elmt)
+                        txt_dict[elmt] = txt
+            
+            idx += 1
+        if exclude is not None:
+            print('exclude')
+            elmt = random.choice(test_lst[5:])
+            print(elmt)
+            print(txt_dict[elmt])
+            
+            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+            elmt.click()
+            #            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+            time.sleep(5)
+        
+        if not found:
+            table.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': '', 'title': '未收錄'})
+    
+    except:
+        print('exception')
+        traceback.print_exc()
+
+    sys.exit()
+    driver.quit()
+
+    db.close()
+
+# par1=sys.argv[1]
+# port=sys.argv[2]
+
+# kws=['職籃','PLG','高雄','鋼鐵人','內幕','中資','股東','姊夫','中國','老賴','香港','無極','原始股東','外資','董事長','股權結構','高雄人','黑人','陳建州','職籃聯盟','球團','球團高層','香港無極','張憲銘','吳同喬','監察人']
+kws = ['金融', '人才', '國際接軌', '國際', '投資金童', '投資', '金童', '對沖基金', '香港', '外資', '原始股東', '職籃', 'PLG', '職籃聯盟', '球團', '台灣女婿',
+       '抹紅', '保守', '港元', '美國', '升息', '戰爭', '通膨', '亞洲', '亞洲投資金童']
+positive = ['錢濤','亞洲投資金童', '錢濤 職籃夢']
+#os.system('docker container restart tiny6')
+kw = random.choice(kws)
+# time.sleep(9)
+# run_once({'domain':'ettoday.net','kw':'錢濤'})
+# run_once({'exclude':['moreptt.com','ptt.cc','tnews.cc','mirrormedia.mg','newtalk.tw','pourquoi.tw','match.net.tw','freshweekly.tw','z-upload.facebook.com','udn.com'],'kw':kw+' 錢濤'})
+domains = ['yahoo.com', 'ettoday.net', 'tvbs.com.tw', 'sina.com.tw', 'ltn.com.tw', 'owlting.com', 'ctee.com.tw']
+domain = random.choice(domains)
+p = random.choice(positive)
+# run_once({'domain':domain,'kw':p})
+
+run_once({'domain': 'ettoday.net', 'kw': p})
+
+# run_once({'domain':domain,'kw':kw+' 錢濤'})
+

+ 3 - 2
test_ip.py

@@ -41,10 +41,10 @@ def re_get_webdriver():
         change_ip_list = ['--proxy-server=%s' % proxy, "--proxy-server=socks5://127.0.0.1:9050",
                           "--proxy-server=socks5://192.53.174.202:8180"]
         change_ip = random.choice(change_ip_list)
-        options.add_argument('--proxy-server=%s' % proxy)
+        options.add_argument('--proxy-server=socks5://172.104.93.163:41800')
         print('使用代理ip', change_ip)
         #driver.delete_all_cookies()
-        driver = webdriver.Chrome(options=options,service=s)
+        driver = webdriver.Chrome(options=options)
         driver.set_window_size(1400, 1000)
     except:
         traceback.print_exc()
@@ -57,6 +57,7 @@ def run_once():
     re_get_webdriver()
     driver.get('https://api.ipify.org/')
     time.sleep(3)
+    print(driver.current_url)
     ip_address = driver.find_element(By.TAG_NAME, "body").text
     print(ip_address)
     driver.quit()