浏览代码

Merge remote-tracking branch 'origin/master'

zooey 2 年之前
父节点
当前提交
fdc96bae52

+ 287 - 0
oak_u/etoday_use.py

@@ -0,0 +1,287 @@
+#import redis
+import time
+import traceback
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import urllib
+import os
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+from selenium.webdriver.common.keys import Keys
+import json
+import random
+import time
+import redis
+import sys
+import codecs
+import random
+import os
+import time
+import requests
+import datetime
+driver=None
+from fake_useragent import UserAgent
+
+ua = UserAgent()
+#proxy_enabled=True
+# proxy_enabled=False
+
+# # https://youtu.be/cR2M5Khgxvc
+
+# db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+# glog_table=db['general_log']
+
+def re_get_webdriver():
+    # global port
+    global driver
+    global portnum
+    # os.system('killall chrome')
+    result=[]
+    # if driver is not None:
+    #     print('closing....')
+    #     driver.quit()
+    #     print('quit....')
+    #     driver=None
+    # os.system()
+    options = webdriver.ChromeOptions()
+    options.add_argument("--user-agent=" +ua.random)    
+    options.add_argument("--no-sandbox")
+    options.add_argument("--headless")
+    options.add_argument("--incognito")
+    driver = webdriver.Remote(
+                command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
+            options=options)
+    return driver
+    # try:
+    #     options = webdriver.ChromeOptions()
+        
+    #     options.add_argument("--no-sandbox")
+    #     options.add_argument("--headless")
+    #     options.add_argument("--incognito")
+    #     # if proxy_enabled:
+    #     #     options.add_argument('--proxy-server=socks5://172.104.92.245:14900')
+
+    #     try:
+    #         driver = webdriver.Remote(
+    #             command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
+    #         options=options)
+    #     except:
+    #         traceback.print_exc()
+    #         return None
+    #     return driver
+    # except:
+    #     traceback.print_exc()
+    #     driver=None
+    #     return None
+    # return driver
+
+
+def run_once():
+    global count
+    global portnum
+    global bok
+    # global glog_table
+    # table=db['nda_log']
+    # print(jsobj)
+    # kw=jsobj['kw']
+
+    
+    # options = webdriver.ChromeOptions()
+    
+    # options.add_argument("--no-sandbox")
+    # options.add_argument("--headless")
+    # options.add_argument("--incognito")
+    # driver = webdriver.Remote(
+    #         command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
+    #     options=options)
+    
+        # if driver is not None:
+        #     break
+    
+    
+    ettoday_url_list = ['https://house.ettoday.net/news/1492047',
+        'https://house.ettoday.net/news/1492167',
+        'https://house.ettoday.net/news/1492288',
+        'https://house.ettoday.net/news/1492178',
+        'https://house.ettoday.net/news/1492229',
+        'https://house.ettoday.net/news/1492134',
+        'https://house.ettoday.net/news/1492240',
+        'https://house.ettoday.net/news/1492161',
+        'https://house.ettoday.net/news/1492168',
+        'https://house.ettoday.net/news/1492217']
+    
+    # try:        
+    for i in ettoday_url_list:
+        try:
+            driver=re_get_webdriver()
+        except:
+            portnum=random.randint(4555,4666)
+            print(portnum)
+            os.system('docker container stop p8816')
+            time.sleep(5)
+            os.system('docker container rm p8816')
+            time.sleep(5)
+            os.system('docker run -d -p '+str(portnum)+':4444 --name p8816 --dns 168.95.1.1 selenium/standalone-chrome:101.0')
+            bok += 1
+            count=0
+            time.sleep(5)
+            driver=re_get_webdriver()
+        time.sleep(3)
+        try:
+            driver.get(i)
+            time.sleep(3)
+            elmt_next = driver.find_element(By.XPATH, '//*[@id="house"]/div[3]/div[2]/div[6]/div/div/div[1]/article/div/div[3]/p[1]/a')
+
+            webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
+            time.sleep(3)
+
+            webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
+            print("cick!",i)
+            count+=1
+            print("count_time:",count,';borken_time:',bok)
+        # elmt = driver.find_element(By.XPATH, '//*[@id="yschsp"]')
+            time.sleep(random.randint(3,7))
+            driver.quit()
+        except:
+            driver.quit()
+            print("wrong",i,';borken_time:',bok)
+            time.sleep(5)
+    # except:
+    #     print('wrong for:',i)
+        # kw=jsobj['kw']
+        # if jsobj.get('domain') is None:
+        #     exclude=jsobj['exclude']
+        #     domain=None
+        # else:
+        #     domain=jsobj['domain']
+        #     exclude=None
+#         driver.get('https://www.google.com?num=100')
+#         time.sleep(17)
+#         while True:
+#             try:
+#                 print(driver.current_url)
+#                 break
+#             except:
+#                 traceback.print_exc()
+#                 driver=re_get_webdriver()
+#                 time.sleep(3)
+#                 driver.get('https://www.google.com?num=100')
+#                 time.sleep(3)
+
+#             time.sleep(3)
+
+#         elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+#         time.sleep(1)
+#         elmt.send_keys(kw)
+#         elmt.send_keys(Keys.ENTER)
+#         time.sleep(6)
+
+#         elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
+
+#         numresults=len(elmts)
+# #        time.sleep(9999)
+#         print('搜尋結果數量',numresults)
+#         if numresults==0:
+#             print(driver.current_url)
+#             print(driver.title)
+#             sys.exit()
+
+#         idx=1
+#         found=False
+#         test_lst=[]
+#         for elmt in elmts:
+#             href=elmt.get_attribute('href')
+#             txt=elmt.text
+#             if len(txt)>10:
+#                 if domain is not None:
+#                     for d in domain:
+#                         if d in href:
+#                             print('found....')
+#                             print('clicked....')
+#                             print(href)
+#                             print(txt)
+#                             print("ranking", idx)
+#                             found=True
+
+#                             webdriver.ActionChains(driver).move_to_element(elmt).perform()
+# #                            elmt.click()
+#                             webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+#                             table.insert({'kw':kw,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'result':numresults,'client':'64G'})
+#                             time.sleep(6)
+
+#                             return
+#                 else:
+#                     if exclude not in href:
+#                         test_lst.append(elmt)
+                    
+#             idx+=1
+#         if exclude is not None:
+#             print('exclude')
+#             elmt=random.choice(test_lst)
+#             print(elmt)
+
+#             webdriver.ActionChains(driver).move_to_element(elmt).perform()
+#             webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+#             time.sleep(5)
+
+#         if not found:
+#             table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
+
+#     except:
+#         traceback.print_exc()
+
+#         print('exception')
+        # traceback.print_exc()
+        
+
+
+# time.sleep(5)
+
+# r=random.randint(0,27)
+# r=26
+
+# cursor=db.query('select json from seo_jobs where cust="KNIGHT" and plan="形象SEO" order by rand() limit 1')
+# for c in cursor:
+#     js=json.loads(c['json'])
+#     prefix=js['prefix']
+#     postfix=js['postfix']
+#     domain=js['domain'][0]
+#     positive=js['positive']
+#     rnd=js['rnd']
+
+
+portnum=random.randint(4555,4666)
+print(portnum)
+os.system('docker container stop p8816')
+time.sleep(5)
+os.system('docker container rm p8816')
+time.sleep(5)
+os.system('docker run -d -p '+str(portnum)+':4444 --name p8816 --dns 168.95.1.1 selenium/standalone-chrome:101.0')
+bok = 0
+count=0
+time.sleep(5)
+while True:
+    # run_once()
+    # time.sleep(10)
+
+    run_once()
+
+
+
+
+# kw=random.choice(positive)
+# kw2=random.choice(rnd)
+
+# count=0
+# while True:
+#     try:
+#         run_once({'domain':domain,'kw':prefix+" "+kw+" "+kw2})
+#         count+=1
+#     except:
+#         continue
+#     print('中場休息 次數',count)
+#     time.sleep(random.randint(120,150))

+ 1 - 0
oak_u/readme.md

@@ -0,0 +1 @@
+記得要修改chrome版本和docker名稱才能多個使用,另外雖然我有設定重啟,但時間長了還是會發生錯誤,並須重啟(暫時還不知道原因)

+ 377 - 0
oak_u/yahoo_use.py

@@ -0,0 +1,377 @@
+import time
+from datetime import datetime
+import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import codecs
+import random
+import requests
+import dataset
+import traceback
+import sys
+from selenium.webdriver.common.keys import Keys
+import timeit
+import socket
+
+import random 
+import re
+
+
+# import requests
+
+
+
+from fake_useragent import UserAgent
+
+ua = UserAgent()
+def re_get_webdriver():
+    # global port
+    global driver
+    global portnum
+    # os.system('killall chrome')
+    result=[]
+    # if driver is not None:
+    #     print('closing....')
+    #     driver.quit()
+    #     print('quit....')
+    #     driver=None
+    # os.system()
+    options = webdriver.ChromeOptions()
+    options.add_argument("--user-agent=" +ua.random)    
+    options.add_argument("--no-sandbox")
+    options.add_argument("--headless")
+    options.add_argument("--incognito")
+    driver = webdriver.Remote(
+                command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
+            options=options)
+    return driver
+# headers = {'user-agent': ua.chrome}
+# r = requests.get('https://house.ettoday.net/news/1492047', headers=headers)
+# print(r.text)
+
+
+# options.binary_location = ('C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe')
+# driverPath = './chromedriver.exe'
+
+# driver = webdriver.Firefox()
+# driver.get('https://google.com')
+
+# ettoday_url_list = ['https://house.ettoday.net/news/1492047',
+# 'https://house.ettoday.net/news/1492167',
+# 'https://house.ettoday.net/news/1492288',
+# 'https://house.ettoday.net/news/1492178',
+# 'https://house.ettoday.net/news/1492229',
+# 'https://house.ettoday.net/news/1492134',
+# 'https://house.ettoday.net/news/1492240',
+# 'https://house.ettoday.net/news/1492161',
+# 'https://house.ettoday.net/news/1492168',
+# 'https://house.ettoday.net/news/1492217']
+# for i in ettoday_url_list:
+    
+#     driver.get(i)
+#     time.sleep(3)
+#     elmt_next = driver.find_element(By.XPATH, '//*[@id="house"]/div[3]/div[2]/div[6]/div/div/div[1]/article/div/div[3]/p[1]/a')
+
+#     webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
+#     webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
+#     print("cick:",i)
+# # elmt = driver.find_element(By.XPATH, '//*[@id="yschsp"]')
+#     time.sleep(random.randint(3,7))
+#     driver.quit()
+
+# query='幸福空間'
+# elmt.send_keys(query)
+# elmt.send_keys(Keys.ENTER)
+# time.sleep(1)
+
+# time.sleep(1)
+
+def run_once():
+    global count
+    global bok
+    global portnum
+    yahoo_url_list = [
+    'https://house.yahoo.com.tw/%E9%9B%8D%E5%AE%B9%E9%9B%85%E7%B7%BB-%E5%84%AA%E9%9B%85%E5%81%87%E6%9C%9F-%E6%96%B0%E5%8F%A4%E5%85%B8-31%E5%9D%AA-020000499.html',
+    'https://house.yahoo.com.tw/%E6%96%B0%E7%94%9F%E9%AD%85%E5%8A%9B-%E8%AD%9C%E5%AF%AB%E5%B9%B8%E7%A6%8F%E5%9C%93%E8%88%9E%E6%9B%B2-%E5%8C%97%E6%AD%90%E9%A2%A8-35%E5%9D%AA-020000759.html',
+    'https://house.yahoo.com.tw/20%E5%B9%B4%E8%80%81%E5%AE%85%E9%87%8D%E7%94%9F-%E7%BE%8E%E5%BC%8F%E4%BD%8E%E5%A5%A2%E6%9C%89%E5%AE%B6%E7%9A%84%E6%BA%AB%E5%BA%A6-106%E5%9D%AA-020000087.html',
+    'https://house.yahoo.com.tw/sheer-%E7%B4%94%E7%B2%B9-%E7%8F%BE%E4%BB%A3%E9%A2%A8-25%E5%9D%AA-020000325.html',
+    'https://house.yahoo.com.tw/%E8%AE%8A%E5%BD%A2%E8%88%87%E7%B5%84%E5%90%88-%E8%A4%87%E5%90%88%E5%BC%8F%E7%9A%84%E7%A9%BA%E9%96%93%E8%A8%AD%E8%A8%88-%E4%B8%AD-020000869.html',
+    'https://house.yahoo.com.tw/%E8%A7%A3%E6%94%BE%E6%8B%98%E7%A6%81%E5%BF%83%E9%9D%88-%E8%B6%85%E8%84%AB%E7%8B%82%E6%83%B3%E9%80%8F%E5%A4%A9%E5%8E%9D-020000093.html',
+    'https://house.yahoo.com.tw/%E8%A6%AA%E5%AD%90%E6%96%99%E7%90%86%E7%9B%B4%E6%92%AD%E4%B8%BB%E7%9A%84%E5%AE%B6-%E5%BE%AE%E7%BE%8E%E5%BC%8F%E8%A8%AD%E8%A8%88-50%E5%9D%AA-020000607.html',
+    'https://house.yahoo.com.tw/%E5%82%B3%E9%81%94%E6%B7%B1%E8%89%B2%E6%BA%AB%E5%BA%A6-%E8%8B%B1%E5%80%AB%E7%B4%B3%E5%A3%AB%E8%B2%B4%E6%97%8F%E9%A2%A8-%E7%8F%BE%E4%BB%A3%E5%A5%A2%E8%8F%AF%E9%A2%A8-020000334.html',
+    'https://house.yahoo.com.tw/%E7%8E%A9%E5%91%B3%E7%B3%BB%E7%B5%B1%E6%9D%BF-%E5%BF%AB%E9%80%9F%E6%88%90%E5%AE%B6%E7%B0%A1%E7%B4%84%E7%8F%BE%E4%BB%A3%E9%A2%A8-35%E5%9D%AA-020000199.html',
+    'https://house.yahoo.com.tw/%E4%BB%A5%E5%9C%93%E5%BD%A2%E7%AC%A6%E7%A2%BC-%E5%BD%A2%E5%A1%91%E6%81%A2%E5%BC%98%E5%A5%A2%E7%BE%8E%E8%87%BB%E9%82%B8-%E5%A5%A2%E8%8F%AF%E9%A2%A8-42%E5%9D%AA-020000780.html']
+    
+    
+    for i in yahoo_url_list:
+        try:
+            try:
+                driver = re_get_webdriver()
+            except:
+                print('driver_bok')
+                portnum=random.randint(4555,4666)
+                print(portnum)
+                os.system('docker container stop p8809')
+                time.sleep(5)
+                os.system('docker container rm p8809')
+                time.sleep(5)
+                os.system('docker run -d -p '+str(portnum)+':4444 --name p8809 --dns 168.95.1.1 selenium/standalone-chrome:106.0')
+                count=0
+                bok+=1
+                time.sleep(5)
+                driver = re_get_webdriver()
+
+            driver.get(i)
+            time.sleep(5)
+            elmt_next = driver.find_element(By.XPATH, '//*[@id="maincontainer"]/main/div/div[2]/div[1]/div[1]/div[1]/div[1]/div/div/div[1]/a')
+
+            webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
+            webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
+            print("cick!")
+            count+=1
+            print('click_all_time:',count,';broken_time:',bok)
+        # elmt = driver.find_element(By.XPATH, '//*[@id="yschsp"]')
+            time.sleep(random.randint(3,7))
+            driver.quit()
+        except:
+            driver.quit()
+            print(i,'error',';broken_time:',bok)
+            time.sleep(10)
+portnum=random.randint(4555,4666)
+print(portnum)
+os.system('docker container stop p8809')
+time.sleep(5)
+os.system('docker container rm p8809')
+time.sleep(5)
+os.system('docker run -d -p '+str(portnum)+':4444 --name p8809 --dns 168.95.1.1 selenium/standalone-chrome:106.0')
+count=0
+bok=0
+time.sleep(5)
+while True:
+    # run_once()
+    # time.sleep(10)
+
+    run_once()
+# elmts=driver.find_elements("xpath",'//*[@id="web"]/ol/li/div/div[1]/h3/a')
+# domain = 'hhh.com.tw'
+# idx=1
+# ranking=-1
+# domain_in_link = 0
+# print (len(elmts))
+# # driver.save_screenshot('c:/tmp/test.png')
+# n=0
+# for el in elmts:
+#     n+=1
+#     href=el.get_attribute('href')
+#     txt=el.text
+#     # print(txt)
+#     if len(txt)>10:
+#         if domain in href:
+#             domain_in_link += 1
+#             print('clicked....')
+#             print('href:',href)
+#             print('txt:',txt)
+# elmt_next = driver.find_element(By.XPATH, '//*[@id="left"]/div/ol/li[1]/div/div/a')
+
+# webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
+# webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
+# time.sleep(2)
+# elmts=driver.find_elements("xpath",'//*[@id="web"]/ol/li/div/div[1]/h3/a')
+# domain = 'hhh.com.tw'
+# idx=1
+# ranking=-1
+# domain_in_link = 0
+# print (len(elmts))
+# # driver.save_screenshot('c:/tmp/test.png')
+# n=0
+# for el in elmts:
+#     n+=1
+#     href=el.get_attribute('href')
+#     txt=el.text
+#     # print(txt)
+#     if len(txt)>10:
+#         if domain in href:
+#             domain_in_link += 1
+#             print('clicked....')
+#             print('href:',href)
+#             print('txt:',txt)
+# elmt_next = driver.find_element(By.XPATH, '//*[@id="left"]/div/ol/li[1]/div/div/a[2]')
+
+# webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
+# webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
+# time.sleep(5)
+# for i in range(20):
+#     try:
+#         elmt_next = driver.find_element(By.XPATH, '//*[@id="left"]/div/ol/li[1]/div/div/a[2]')
+
+#         webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
+#         webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
+#         time.sleep(5)
+#     except:
+#         time.sleep(200)
+#                 webdriver.ActionChains(driver).move_to_element(el).click().perform()
+# add_tabs = [7,9,11,13,15,7,9,11,13,15,7,9,11,13,15,7,9,11,13,15]
+
+# db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+# driver=None
+# headers = {
+#         "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+#         "Content-Type": "application/x-www-form-urlencoded"
+# }
+
+# sleepoffset = 0
+
+# def send_msg(kw):
+#     params = {"message": "處理關鍵字: "+kw}  
+#     r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+# def empty_query(q):
+#     global driver
+#     googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+#     driver.get(googleurl)
+#     time.sleep(3)
+
+# def process_query(domain, target_domain, brands, query):
+#     print(query)
+#     sleepoffset = 0
+#     global driver
+#     if query == "艾立思" and "index" in target_domain:
+#         driver.get('https://www.google.com/search?num=100&q=艾立思&rlz=1C1ONGR_zh-TWTW997TW997&ei=zjdUY_DBG9Lm-Abpgq84&start=0&sa=N&filter=0&ved=2ahUKEwjw4KeEvfT6AhVSM94KHWnBCwcQ8tMDegQIARAQ&cshid=1666463754367857&biw=1368&bih=761&dpr=2')
+#         time.sleep(4)
+#     else:
+#         driver.get('https://www.google.com?num=100')
+#         time.sleep(3)
+#         print(driver.current_url)
+
+#         # elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
+#         # ABOVE METHOD IS DEPRECATED STARTING SELENIUM 4.3.0, USE THIS
+#         #
+#         elmt = driver.find_element(By.XPATH, "//input[@name='q']")
+#         time.sleep(1)
+
+#         elmt.send_keys(query)
+#         elmt.send_keys(Keys.ENTER)
+
+#     idx=1
+#     ranking=-1
+#     domain_in_link = 0
+
+#     googleurl = driver.current_url
+#     print(driver.current_url)
+
+#     if "sorry" in googleurl:
+#         return 444
+
+#     elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a")
+
+
+#     print (len(elmts))
+#     # driver.save_screenshot('c:/tmp/test.png')
+#     n=0
+#     for el in elmts:
+#         n+=1
+#         href=el.get_attribute('href')
+#         txt=el.text
+#         if len(txt)>10:
+#             if domain in href:
+#                 domain_in_link += 1
+#                 print('clicked....')
+#                 print(href)
+#                 print(txt)
+
+#                 if query == "艾立思" and "index" in target_domain and href != "https://hhh.com.tw/brand-index.php?brand_id=211":
+#                     print("wrong site")
+#                     continue
+                
+#                 webdriver.ActionChains(driver).move_to_element(el).perform()
+#                 webdriver.ActionChains(driver).move_to_element(el).click().perform()
+#                 print("Rank: " + str(n))
+#                 time.sleep(15)
+
+#                 ''' unused
+#                 new_windows_count = add_tabs[random.randint(0,19)]
+#                 print(str(new_windows_count) + " new tabs")
+#                 for i in range (0,new_windows_count):
+#                     print("Tab " + str(i+1))
+#                     #original_window = driver.current_window_handle
+#                     #driver.switch_to.new_window('window')
+#                     #driver.get(href)
+#                     sleepoffset += 12
+#                     driver.execute_script('window.open("'+href+'","_blank");')
+#                     driver.execute_script("window.scrollTo(0, 600)")
+#                     time.sleep(15)
+#                     #driver.close()
+#                     #driver.switch_to.window(original_window)
+                
+#                 if domain in target_domain:
+#                     print("Target link found")
+#                     time_stamp = datetime.fromtimestamp(time.time())
+#                     time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
+#                     db['click_results'].insert({"time_stamp": time_stamp, "brand": brands[domain], "domain": domain, "query": query, "url": href, "content": txt, "extra_windows": '0'})
+#                 '''
+#                 break
+
+#     '''if domain in target_domain:
+#         print("Target domain found")
+#         time_stamp = datetime.fromtimestamp(time.time())
+#         time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
+#         db['query_results'].insert({"time_stamp": time_stamp, "brand": brands[domain], "domain": domain, "query": query, "googleurl": googleurl, "element_count": len(elmts), "domain_in_link_count": domain_in_link})
+#     '''        
+
+#     print(domain_in_link)
+#     return 200
+    
+
+# def run_once(domain, target_domain, brands, query):
+#     global driver
+#     result=[]
+#     options = webdriver.ChromeOptions()
+#     options.add_argument('--headless')
+# #    options.add_argument("--user-agent=" +user_agent)
+#     options.add_argument("--incognito")
+#     options.add_argument('--no-sandbox')
+#     options.add_argument('--disable-dev-shm-usage')
+
+#     driver = webdriver.Chrome(
+#     options=options)
+
+#     driver.delete_all_cookies()
+#     driver.set_window_size(1400,1000)
+
+#     statuscode = process_query(domain, target_domain, brands, query)
+#     driver.quit()
+
+#     return statuscode
+
+# #execution starts here
+
+# def execute(domain, target_domain, brands, query_list):
+#     print("Ctrl+C or Ctrl+Z to stop.")
+#     statuscode = 0
+#     st = timeit.default_timer()
+#     try:
+#         statuscode = run_once(domain, target_domain, brands, random.choice(query_list))
+#     except:
+#         traceback.print_exc()
+#     timetaken = timeit.default_timer()-st
+#     print("Time taken: " + str(timetaken))
+    
+#     print("Process returned with " + str(statuscode))
+#     if statuscode == 444:
+#         print("You have been caught!!!")
+        
+#         #notify("Clickbot " + brands[domain] + " has been caught by Google and will terminate. IP: ")
+
+#     extrasleep = 0
+#     if(timetaken < 50):
+#         extrasleep = 50 - timetaken
+#     print("Ctrl+C or Ctrl+Z to stop now.")
+#     print("You have " + str(10 + extrasleep) + " seconds.")
+#     time.sleep(10 + extrasleep)
+#     return statuscode

+ 9 - 5
website_clickjobs/type-1/hhh_gather.py

@@ -24,12 +24,13 @@ def process_query(target):
     global driver
     global driver
     try:
     try:
         driver.get(target) 
         driver.get(target) 
-        name=driver.find_element(By.CLASS_NAME,"infoCard__name")
+        #name=driver.find_element(By.CLASS_NAME,"infoCard__name") v2 disabled
         org=driver.find_element(By.CLASS_NAME,"infoCard__company")
         org=driver.find_element(By.CLASS_NAME,"infoCard__company")
     except:
     except:
         pass
         pass
     time.sleep(10)
     time.sleep(10)
-    return name.text,org.text
+    #return name.text,org.text
+    return org.text
 
 
 
 
 def run_once(target):
 def run_once(target):
@@ -55,16 +56,19 @@ def run_once(target):
         try:
         try:
             name="nn"
             name="nn"
             org="nn"
             org="nn"
-            name,org=process_query(t)
+            #name,org=process_query(t)
+            org=process_query(t)
             print(name)
             print(name)
             print(org)
             print(org)
-            newstr = t + ',' + name + ',' + org
+            #newstr = t + ',' + name + ',' + org
+            newstr = t + ',' + org
             alt1 = org[:2]
             alt1 = org[:2]
             alt2 = alt1 + org[-2:]
             alt2 = alt1 + org[-2:]
             alt3 = alt1 + org[-4:]
             alt3 = alt1 + org[-4:]
         except:
         except:
             print("error")
             print("error")
-        data.append([t,name,org,alt1,alt2,alt3])
+        #data.append([t,name,org,alt1,alt2,alt3])
+        data.append([t,org])
         time.sleep(3)
         time.sleep(3)
     driver.quit()
     driver.quit()
 
 

文件差异内容过多而无法显示
+ 0 - 0
website_clickjobs/type-1/hhh_r.py


文件差异内容过多而无法显示
+ 24 - 0
website_clickjobs/type-1/hhh_r2.py


+ 1 - 1
website_clickjobs/type-FD-N/yogoclean.py

@@ -5,6 +5,6 @@ domain = 'https://www.yogoclean.com'
 target_domain = ['yogoclean.com']
 target_domain = ['yogoclean.com']
 brands={domain:'有夠讚'}
 brands={domain:'有夠讚'}
 query_list = ('有夠讚','有夠讚')
 query_list = ('有夠讚','有夠讚')
-sleepinterval = 1
+sleepinterval = 3
 
 
 execute(domain, target_domain, brands, query_list, sleepinterval)
 execute(domain, target_domain, brands, query_list, sleepinterval)

+ 4 - 2
website_clickjobs/type-Y/_execute.py

@@ -14,7 +14,7 @@ from random import randint
 #from setting import rua
 #from setting import rua
 
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
-table=db['nda_log']
+table=db['general_log']
 path = 'C:/ChromeDriver' #pls adjust
 path = 'C:/ChromeDriver' #pls adjust
 path_z = 'C:/ChromeDriver' #pls adjust
 path_z = 'C:/ChromeDriver' #pls adjust
 
 
@@ -30,10 +30,12 @@ def restart_browser():
 
 
 resultdict={'搜尋詞':[],'網域':[],'結果標題':[],'結果網址':[],'結果名次':[]}
 resultdict={'搜尋詞':[],'網域':[],'結果標題':[],'結果網址':[],'結果名次':[]}
 
 
-def process_one(term, target, n, sr):
+def process_one(term, target, n, confidential):
     try:
     try:
         print(term)
         print(term)
 
 
+        table=db['nda_log'] if confidential == 1 else db['general_log']
+
         driver=restart_browser()
         driver=restart_browser()
         # escaped_search_term=urllib.parse.quote(term)
         # escaped_search_term=urllib.parse.quote(term)
         yturl = 'https://www.youtube.com/results?search_query=' + term
         yturl = 'https://www.youtube.com/results?search_query=' + term

+ 3 - 1
website_clickjobs/type-Y/c1.py

@@ -17,6 +17,8 @@ def pickvideo():
         whitelist = ["https://www.youtube.com/watch?v=7UGiKEc2JEU","https://www.youtube.com/watch?v=By63yTOiPFQ","https://www.youtube.com/watch?v=QzyTD949cVk","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=9lJ3gnqMT4o","https://www.youtube.com/watch?v=iA4__EcJE5I","https://www.youtube.com/watch?v=fx4fTBh9PFo","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=juJTjzi4DV0","https://www.youtube.com/watch?v=D9A8S1XaPnA","https://www.youtube.com/watch?v=tbdpEt65LRI","https://www.youtube.com/watch?v=Z4mts-HrBvU","https://www.youtube.com/watch?v=Hi-IY6R7_10","https://www.youtube.com/watch?v=e2jILHgLW10"]
         whitelist = ["https://www.youtube.com/watch?v=7UGiKEc2JEU","https://www.youtube.com/watch?v=By63yTOiPFQ","https://www.youtube.com/watch?v=QzyTD949cVk","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=9lJ3gnqMT4o","https://www.youtube.com/watch?v=iA4__EcJE5I","https://www.youtube.com/watch?v=fx4fTBh9PFo","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=juJTjzi4DV0","https://www.youtube.com/watch?v=D9A8S1XaPnA","https://www.youtube.com/watch?v=tbdpEt65LRI","https://www.youtube.com/watch?v=Z4mts-HrBvU","https://www.youtube.com/watch?v=Hi-IY6R7_10","https://www.youtube.com/watch?v=e2jILHgLW10"]
     return term, random.choice(whitelist)
     return term, random.choice(whitelist)
 
 
+is_confidential = 1 #set to 1 if true
+
 termlist = ["信義房屋","信義 房屋","信義房仲","信義 房仲"]
 termlist = ["信義房屋","信義 房屋","信義房仲","信義 房仲"]
 
 
 clickvideo = 2 # set to 1 to alert if video is found (DOES NOT CLICK!), requires whitelist
 clickvideo = 2 # set to 1 to alert if video is found (DOES NOT CLICK!), requires whitelist
@@ -26,5 +28,5 @@ clickvideo = 2 # set to 1 to alert if video is found (DOES NOT CLICK!), requires
 if clickvideo == 1 or clickvideo == 2:
 if clickvideo == 1 or clickvideo == 2:
     while True:
     while True:
         kw, target = pickvideo()
         kw, target = pickvideo()
-        process_one(kw, target, clickvideo, 0)
+        process_one(kw, target, clickvideo, is_confidential)
         time.sleep(10)
         time.sleep(10)

+ 25 - 0
website_clickjobs/type-Y/drhuang.py

@@ -0,0 +1,25 @@
+from _execute import *
+import random
+
+def pickvideo():
+    n = 0
+    if n==0:
+        term = "台北牙周病醫生"
+        whitelist = ["https://www.youtube.com/watch?v=eFamkpqbVLM","https://www.youtube.com/watch?v=aC2SIWzqvKY"]
+    return term, random.choice(whitelist)
+
+is_confidential = 0 #set to 1 if true
+
+termlist = ["信義房屋","信義 房屋","信義房仲","信義 房仲"]
+
+clickvideo = 2 # set to 1 to alert if video is found (DOES NOT CLICK!), requires whitelist
+# ^ set to 2 to click the video when found, requires whitelist
+# ^ any other value will only search for videos based on termlist
+
+if clickvideo == 1 or clickvideo == 2:
+    while True:
+        kw, target = pickvideo()
+        kw = kw + "推薦" if random.randint(0,3) < 2 else kw
+        kw = kw + " 黃湘雲" if random.randint(0,3) < 2 else kw
+        process_one(kw, target, clickvideo, is_confidential)
+        time.sleep(10)

部分文件因为文件数量过多而无法显示