2 年前 · fdc96bae52
--- a/oak_u/etoday_use.py
+++ b/oak_u/etoday_use.py
@@ -0,0 +1,287 @@
 
				+#import redis
			
 
				+import time
			
 
				+import traceback
			
 
				+#import json
			
 
				+from selenium import webdriver
			
 
				+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
			
 
				+import time
			
 
				+import urllib
			
 
				+import os
			
 
				+from selenium.webdriver.support.ui import WebDriverWait
			
 
				+from selenium.webdriver.common.by import By
			
 
				+from selenium.webdriver.support import expected_conditions as EC
			
 
				+import dataset
			
 
				+from selenium.webdriver.common.keys import Keys
			
 
				+import json
			
 
				+import random
			
 
				+import time
			
 
				+import redis
			
 
				+import sys
			
 
				+import codecs
			
 
				+import random
			
 
				+import os
			
 
				+import time
			
 
				+import requests
			
 
				+import datetime
			
 
				+driver=None
			
 
				+from fake_useragent import UserAgent
			
 
				+
			
 
				+ua = UserAgent()
			
 
				+#proxy_enabled=True
			
 
				+# proxy_enabled=False
			
 
				+
			
 
				+# # https://youtu.be/cR2M5Khgxvc
			
 
				+
			
 
				+# db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
			
 
				+# glog_table=db['general_log']
			
 
				+
			
 
				+def re_get_webdriver():
			
 
				+    # global port
			
 
				+    global driver
			
 
				+    global portnum
			
 
				+    # os.system('killall chrome')
			
 
				+    result=[]
			
 
				+    # if driver is not None:
			
 
				+    #     print('closing....')
			
 
				+    #     driver.quit()
			
 
				+    #     print('quit....')
			
 
				+    #     driver=None
			
 
				+    # os.system()
			
 
				+    options = webdriver.ChromeOptions()
			
 
				+    options.add_argument("--user-agent=" +ua.random)    
			
 
				+    options.add_argument("--no-sandbox")
			
 
				+    options.add_argument("--headless")
			
 
				+    options.add_argument("--incognito")
			
 
				+    driver = webdriver.Remote(
			
 
				+                command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
			
 
				+            options=options)
			
 
				+    return driver
			
 
				+    # try:
			
 
				+    #     options = webdriver.ChromeOptions()
			
 
				+        
			
 
				+    #     options.add_argument("--no-sandbox")
			
 
				+    #     options.add_argument("--headless")
			
 
				+    #     options.add_argument("--incognito")
			
 
				+    #     # if proxy_enabled:
			
 
				+    #     #     options.add_argument('--proxy-server=socks5://172.104.92.245:14900')
			
 
				+
			
 
				+    #     try:
			
 
				+    #         driver = webdriver.Remote(
			
 
				+    #             command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
			
 
				+    #         options=options)
			
 
				+    #     except:
			
 
				+    #         traceback.print_exc()
			
 
				+    #         return None
			
 
				+    #     return driver
			
 
				+    # except:
			
 
				+    #     traceback.print_exc()
			
 
				+    #     driver=None
			
 
				+    #     return None
			
 
				+    # return driver
			
 
				+
			
 
				+
			
 
				+def run_once():
			
 
				+    global count
			
 
				+    global portnum
			
 
				+    global bok
			
 
				+    # global glog_table
			
 
				+    # table=db['nda_log']
			
 
				+    # print(jsobj)
			
 
				+    # kw=jsobj['kw']
			
 
				+
			
 
				+    
			
 
				+    # options = webdriver.ChromeOptions()
			
 
				+    
			
 
				+    # options.add_argument("--no-sandbox")
			
 
				+    # options.add_argument("--headless")
			
 
				+    # options.add_argument("--incognito")
			
 
				+    # driver = webdriver.Remote(
			
 
				+    #         command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
			
 
				+    #     options=options)
			
 
				+    
			
 
				+        # if driver is not None:
			
 
				+        #     break
			
 
				+    
			
 
				+    
			
 
				+    ettoday_url_list = ['https://house.ettoday.net/news/1492047',
			
 
				+        'https://house.ettoday.net/news/1492167',
			
 
				+        'https://house.ettoday.net/news/1492288',
			
 
				+        'https://house.ettoday.net/news/1492178',
			
 
				+        'https://house.ettoday.net/news/1492229',
			
 
				+        'https://house.ettoday.net/news/1492134',
			
 
				+        'https://house.ettoday.net/news/1492240',
			
 
				+        'https://house.ettoday.net/news/1492161',
			
 
				+        'https://house.ettoday.net/news/1492168',
			
 
				+        'https://house.ettoday.net/news/1492217']
			
 
				+    
			
 
				+    # try:        
			
 
				+    for i in ettoday_url_list:
			
 
				+        try:
			
 
				+            driver=re_get_webdriver()
			
 
				+        except:
			
 
				+            portnum=random.randint(4555,4666)
			
 
				+            print(portnum)
			
 
				+            os.system('docker container stop p8816')
			
 
				+            time.sleep(5)
			
 
				+            os.system('docker container rm p8816')
			
 
				+            time.sleep(5)
			
 
				+            os.system('docker run -d -p '+str(portnum)+':4444 --name p8816 --dns 168.95.1.1 selenium/standalone-chrome:101.0')
			
 
				+            bok += 1
			
 
				+            count=0
			
 
				+            time.sleep(5)
			
 
				+            driver=re_get_webdriver()
			
 
				+        time.sleep(3)
			
 
				+        try:
			
 
				+            driver.get(i)
			
 
				+            time.sleep(3)
			
 
				+            elmt_next = driver.find_element(By.XPATH, '//*[@id="house"]/div[3]/div[2]/div[6]/div/div/div[1]/article/div/div[3]/p[1]/a')
			
 
				+
			
 
				+            webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
			
 
				+            time.sleep(3)
			
 
				+
			
 
				+            webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
			
 
				+            print("cick!",i)
			
 
				+            count+=1
			
 
				+            print("count_time:",count,';borken_time:',bok)
			
 
				+        # elmt = driver.find_element(By.XPATH, '//*[@id="yschsp"]')
			
 
				+            time.sleep(random.randint(3,7))
			
 
				+            driver.quit()
			
 
				+        except:
			
 
				+            driver.quit()
			
 
				+            print("wrong",i,';borken_time:',bok)
			
 
				+            time.sleep(5)
			
 
				+    # except:
			
 
				+    #     print('wrong for:',i)
			
 
				+        # kw=jsobj['kw']
			
 
				+        # if jsobj.get('domain') is None:
			
 
				+        #     exclude=jsobj['exclude']
			
 
				+        #     domain=None
			
 
				+        # else:
			
 
				+        #     domain=jsobj['domain']
			
 
				+        #     exclude=None
			
 
				+#         driver.get('https://www.google.com?num=100')
			
 
				+#         time.sleep(17)
			
 
				+#         while True:
			
 
				+#             try:
			
 
				+#                 print(driver.current_url)
			
 
				+#                 break
			
 
				+#             except:
			
 
				+#                 traceback.print_exc()
			
 
				+#                 driver=re_get_webdriver()
			
 
				+#                 time.sleep(3)
			
 
				+#                 driver.get('https://www.google.com?num=100')
			
 
				+#                 time.sleep(3)
			
 
				+
			
 
				+#             time.sleep(3)
			
 
				+
			
 
				+#         elmt = driver.find_element(By.XPATH, "//input[@name='q']")
			
 
				+#         time.sleep(1)
			
 
				+#         elmt.send_keys(kw)
			
 
				+#         elmt.send_keys(Keys.ENTER)
			
 
				+#         time.sleep(6)
			
 
				+
			
 
				+#         elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
			
 
				+
			
 
				+#         numresults=len(elmts)
			
 
				+# #        time.sleep(9999)
			
 
				+#         print('搜尋結果數量',numresults)
			
 
				+#         if numresults==0:
			
 
				+#             print(driver.current_url)
			
 
				+#             print(driver.title)
			
 
				+#             sys.exit()
			
 
				+
			
 
				+#         idx=1
			
 
				+#         found=False
			
 
				+#         test_lst=[]
			
 
				+#         for elmt in elmts:
			
 
				+#             href=elmt.get_attribute('href')
			
 
				+#             txt=elmt.text
			
 
				+#             if len(txt)>10:
			
 
				+#                 if domain is not None:
			
 
				+#                     for d in domain:
			
 
				+#                         if d in href:
			
 
				+#                             print('found....')
			
 
				+#                             print('clicked....')
			
 
				+#                             print(href)
			
 
				+#                             print(txt)
			
 
				+#                             print("ranking", idx)
			
 
				+#                             found=True
			
 
				+
			
 
				+#                             webdriver.ActionChains(driver).move_to_element(elmt).perform()
			
 
				+# #                            elmt.click()
			
 
				+#                             webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
			
 
				+#                             table.insert({'kw':kw,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'result':numresults,'client':'64G'})
			
 
				+#                             time.sleep(6)
			
 
				+
			
 
				+#                             return
			
 
				+#                 else:
			
 
				+#                     if exclude not in href:
			
 
				+#                         test_lst.append(elmt)
			
 
				+                    
			
 
				+#             idx+=1
			
 
				+#         if exclude is not None:
			
 
				+#             print('exclude')
			
 
				+#             elmt=random.choice(test_lst)
			
 
				+#             print(elmt)
			
 
				+
			
 
				+#             webdriver.ActionChains(driver).move_to_element(elmt).perform()
			
 
				+#             webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
			
 
				+#             time.sleep(5)
			
 
				+
			
 
				+#         if not found:
			
 
				+#             table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'})
			
 
				+
			
 
				+#     except:
			
 
				+#         traceback.print_exc()
			
 
				+
			
 
				+#         print('exception')
			
 
				+        # traceback.print_exc()
			
 
				+        
			
 
				+
			
 
				+
			
 
				+# time.sleep(5)
			
 
				+
			
 
				+# r=random.randint(0,27)
			
 
				+# r=26
			
 
				+
			
 
				+# cursor=db.query('select json from seo_jobs where cust="KNIGHT" and plan="形象SEO" order by rand() limit 1')
			
 
				+# for c in cursor:
			
 
				+#     js=json.loads(c['json'])
			
 
				+#     prefix=js['prefix']
			
 
				+#     postfix=js['postfix']
			
 
				+#     domain=js['domain'][0]
			
 
				+#     positive=js['positive']
			
 
				+#     rnd=js['rnd']
			
 
				+
			
 
				+
			
 
				+portnum=random.randint(4555,4666)
			
 
				+print(portnum)
			
 
				+os.system('docker container stop p8816')
			
 
				+time.sleep(5)
			
 
				+os.system('docker container rm p8816')
			
 
				+time.sleep(5)
			
 
				+os.system('docker run -d -p '+str(portnum)+':4444 --name p8816 --dns 168.95.1.1 selenium/standalone-chrome:101.0')
			
 
				+bok = 0
			
 
				+count=0
			
 
				+time.sleep(5)
			
 
				+while True:
			
 
				+    # run_once()
			
 
				+    # time.sleep(10)
			
 
				+
			
 
				+    run_once()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# kw=random.choice(positive)
			
 
				+# kw2=random.choice(rnd)
			
 
				+
			
 
				+# count=0
			
 
				+# while True:
			
 
				+#     try:
			
 
				+#         run_once({'domain':domain,'kw':prefix+" "+kw+" "+kw2})
			
 
				+#         count+=1
			
 
				+#     except:
			
 
				+#         continue
			
 
				+#     print('中場休息 次數',count)
			
 
				+#     time.sleep(random.randint(120,150))
			
--- a/oak_u/readme.md
+++ b/oak_u/readme.md
@@ -0,0 +1 @@
 
				+記得要修改chrome版本和docker名稱才能多個使用,另外雖然我有設定重啟,但時間長了還是會發生錯誤,並須重啟(暫時還不知道原因)
			
--- a/oak_u/yahoo_use.py
+++ b/oak_u/yahoo_use.py
@@ -0,0 +1,377 @@
 
				+import time
			
 
				+from datetime import datetime
			
 
				+import json
			
 
				+from selenium import webdriver
			
 
				+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
			
 
				+import time
			
 
				+import os
			
 
				+import urllib.parse
			
 
				+from selenium.webdriver.support.ui import WebDriverWait
			
 
				+from selenium.webdriver.common.by import By
			
 
				+from selenium.webdriver.support import expected_conditions as EC
			
 
				+import codecs
			
 
				+import random
			
 
				+import requests
			
 
				+import dataset
			
 
				+import traceback
			
 
				+import sys
			
 
				+from selenium.webdriver.common.keys import Keys
			
 
				+import timeit
			
 
				+import socket
			
 
				+
			
 
				+import random 
			
 
				+import re
			
 
				+
			
 
				+
			
 
				+# import requests
			
 
				+
			
 
				+
			
 
				+
			
 
				+from fake_useragent import UserAgent
			
 
				+
			
 
				+ua = UserAgent()
			
 
				+def re_get_webdriver():
			
 
				+    # global port
			
 
				+    global driver
			
 
				+    global portnum
			
 
				+    # os.system('killall chrome')
			
 
				+    result=[]
			
 
				+    # if driver is not None:
			
 
				+    #     print('closing....')
			
 
				+    #     driver.quit()
			
 
				+    #     print('quit....')
			
 
				+    #     driver=None
			
 
				+    # os.system()
			
 
				+    options = webdriver.ChromeOptions()
			
 
				+    options.add_argument("--user-agent=" +ua.random)    
			
 
				+    options.add_argument("--no-sandbox")
			
 
				+    options.add_argument("--headless")
			
 
				+    options.add_argument("--incognito")
			
 
				+    driver = webdriver.Remote(
			
 
				+                command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
			
 
				+            options=options)
			
 
				+    return driver
			
 
				+# headers = {'user-agent': ua.chrome}
			
 
				+# r = requests.get('https://house.ettoday.net/news/1492047', headers=headers)
			
 
				+# print(r.text)
			
 
				+
			
 
				+
			
 
				+# options.binary_location = ('C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe')
			
 
				+# driverPath = './chromedriver.exe'
			
 
				+
			
 
				+# driver = webdriver.Firefox()
			
 
				+# driver.get('https://google.com')
			
 
				+
			
 
				+# ettoday_url_list = ['https://house.ettoday.net/news/1492047',
			
 
				+# 'https://house.ettoday.net/news/1492167',
			
 
				+# 'https://house.ettoday.net/news/1492288',
			
 
				+# 'https://house.ettoday.net/news/1492178',
			
 
				+# 'https://house.ettoday.net/news/1492229',
			
 
				+# 'https://house.ettoday.net/news/1492134',
			
 
				+# 'https://house.ettoday.net/news/1492240',
			
 
				+# 'https://house.ettoday.net/news/1492161',
			
 
				+# 'https://house.ettoday.net/news/1492168',
			
 
				+# 'https://house.ettoday.net/news/1492217']
			
 
				+# for i in ettoday_url_list:
			
 
				+    
			
 
				+#     driver.get(i)
			
 
				+#     time.sleep(3)
			
 
				+#     elmt_next = driver.find_element(By.XPATH, '//*[@id="house"]/div[3]/div[2]/div[6]/div/div/div[1]/article/div/div[3]/p[1]/a')
			
 
				+
			
 
				+#     webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
			
 
				+#     webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
			
 
				+#     print("cick:",i)
			
 
				+# # elmt = driver.find_element(By.XPATH, '//*[@id="yschsp"]')
			
 
				+#     time.sleep(random.randint(3,7))
			
 
				+#     driver.quit()
			
 
				+
			
 
				+# query='幸福空間'
			
 
				+# elmt.send_keys(query)
			
 
				+# elmt.send_keys(Keys.ENTER)
			
 
				+# time.sleep(1)
			
 
				+
			
 
				+# time.sleep(1)
			
 
				+
			
 
				+def run_once():
			
 
				+    global count
			
 
				+    global bok
			
 
				+    global portnum
			
 
				+    yahoo_url_list = [
			
 
				+    'https://house.yahoo.com.tw/%E9%9B%8D%E5%AE%B9%E9%9B%85%E7%B7%BB-%E5%84%AA%E9%9B%85%E5%81%87%E6%9C%9F-%E6%96%B0%E5%8F%A4%E5%85%B8-31%E5%9D%AA-020000499.html',
			
 
				+    'https://house.yahoo.com.tw/%E6%96%B0%E7%94%9F%E9%AD%85%E5%8A%9B-%E8%AD%9C%E5%AF%AB%E5%B9%B8%E7%A6%8F%E5%9C%93%E8%88%9E%E6%9B%B2-%E5%8C%97%E6%AD%90%E9%A2%A8-35%E5%9D%AA-020000759.html',
			
 
				+    'https://house.yahoo.com.tw/20%E5%B9%B4%E8%80%81%E5%AE%85%E9%87%8D%E7%94%9F-%E7%BE%8E%E5%BC%8F%E4%BD%8E%E5%A5%A2%E6%9C%89%E5%AE%B6%E7%9A%84%E6%BA%AB%E5%BA%A6-106%E5%9D%AA-020000087.html',
			
 
				+    'https://house.yahoo.com.tw/sheer-%E7%B4%94%E7%B2%B9-%E7%8F%BE%E4%BB%A3%E9%A2%A8-25%E5%9D%AA-020000325.html',
			
 
				+    'https://house.yahoo.com.tw/%E8%AE%8A%E5%BD%A2%E8%88%87%E7%B5%84%E5%90%88-%E8%A4%87%E5%90%88%E5%BC%8F%E7%9A%84%E7%A9%BA%E9%96%93%E8%A8%AD%E8%A8%88-%E4%B8%AD-020000869.html',
			
 
				+    'https://house.yahoo.com.tw/%E8%A7%A3%E6%94%BE%E6%8B%98%E7%A6%81%E5%BF%83%E9%9D%88-%E8%B6%85%E8%84%AB%E7%8B%82%E6%83%B3%E9%80%8F%E5%A4%A9%E5%8E%9D-020000093.html',
			
 
				+    'https://house.yahoo.com.tw/%E8%A6%AA%E5%AD%90%E6%96%99%E7%90%86%E7%9B%B4%E6%92%AD%E4%B8%BB%E7%9A%84%E5%AE%B6-%E5%BE%AE%E7%BE%8E%E5%BC%8F%E8%A8%AD%E8%A8%88-50%E5%9D%AA-020000607.html',
			
 
				+    'https://house.yahoo.com.tw/%E5%82%B3%E9%81%94%E6%B7%B1%E8%89%B2%E6%BA%AB%E5%BA%A6-%E8%8B%B1%E5%80%AB%E7%B4%B3%E5%A3%AB%E8%B2%B4%E6%97%8F%E9%A2%A8-%E7%8F%BE%E4%BB%A3%E5%A5%A2%E8%8F%AF%E9%A2%A8-020000334.html',
			
 
				+    'https://house.yahoo.com.tw/%E7%8E%A9%E5%91%B3%E7%B3%BB%E7%B5%B1%E6%9D%BF-%E5%BF%AB%E9%80%9F%E6%88%90%E5%AE%B6%E7%B0%A1%E7%B4%84%E7%8F%BE%E4%BB%A3%E9%A2%A8-35%E5%9D%AA-020000199.html',
			
 
				+    'https://house.yahoo.com.tw/%E4%BB%A5%E5%9C%93%E5%BD%A2%E7%AC%A6%E7%A2%BC-%E5%BD%A2%E5%A1%91%E6%81%A2%E5%BC%98%E5%A5%A2%E7%BE%8E%E8%87%BB%E9%82%B8-%E5%A5%A2%E8%8F%AF%E9%A2%A8-42%E5%9D%AA-020000780.html']
			
 
				+    
			
 
				+    
			
 
				+    for i in yahoo_url_list:
			
 
				+        try:
			
 
				+            try:
			
 
				+                driver = re_get_webdriver()
			
 
				+            except:
			
 
				+                print('driver_bok')
			
 
				+                portnum=random.randint(4555,4666)
			
 
				+                print(portnum)
			
 
				+                os.system('docker container stop p8809')
			
 
				+                time.sleep(5)
			
 
				+                os.system('docker container rm p8809')
			
 
				+                time.sleep(5)
			
 
				+                os.system('docker run -d -p '+str(portnum)+':4444 --name p8809 --dns 168.95.1.1 selenium/standalone-chrome:106.0')
			
 
				+                count=0
			
 
				+                bok+=1
			
 
				+                time.sleep(5)
			
 
				+                driver = re_get_webdriver()
			
 
				+
			
 
				+            driver.get(i)
			
 
				+            time.sleep(5)
			
 
				+            elmt_next = driver.find_element(By.XPATH, '//*[@id="maincontainer"]/main/div/div[2]/div[1]/div[1]/div[1]/div[1]/div/div/div[1]/a')
			
 
				+
			
 
				+            webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
			
 
				+            webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
			
 
				+            print("cick!")
			
 
				+            count+=1
			
 
				+            print('click_all_time:',count,';broken_time:',bok)
			
 
				+        # elmt = driver.find_element(By.XPATH, '//*[@id="yschsp"]')
			
 
				+            time.sleep(random.randint(3,7))
			
 
				+            driver.quit()
			
 
				+        except:
			
 
				+            driver.quit()
			
 
				+            print(i,'error',';broken_time:',bok)
			
 
				+            time.sleep(10)
			
 
				+portnum=random.randint(4555,4666)
			
 
				+print(portnum)
			
 
				+os.system('docker container stop p8809')
			
 
				+time.sleep(5)
			
 
				+os.system('docker container rm p8809')
			
 
				+time.sleep(5)
			
 
				+os.system('docker run -d -p '+str(portnum)+':4444 --name p8809 --dns 168.95.1.1 selenium/standalone-chrome:106.0')
			
 
				+count=0
			
 
				+bok=0
			
 
				+time.sleep(5)
			
 
				+while True:
			
 
				+    # run_once()
			
 
				+    # time.sleep(10)
			
 
				+
			
 
				+    run_once()
			
 
				+# elmts=driver.find_elements("xpath",'//*[@id="web"]/ol/li/div/div[1]/h3/a')
			
 
				+# domain = 'hhh.com.tw'
			
 
				+# idx=1
			
 
				+# ranking=-1
			
 
				+# domain_in_link = 0
			
 
				+# print (len(elmts))
			
 
				+# # driver.save_screenshot('c:/tmp/test.png')
			
 
				+# n=0
			
 
				+# for el in elmts:
			
 
				+#     n+=1
			
 
				+#     href=el.get_attribute('href')
			
 
				+#     txt=el.text
			
 
				+#     # print(txt)
			
 
				+#     if len(txt)>10:
			
 
				+#         if domain in href:
			
 
				+#             domain_in_link += 1
			
 
				+#             print('clicked....')
			
 
				+#             print('href:',href)
			
 
				+#             print('txt:',txt)
			
 
				+# elmt_next = driver.find_element(By.XPATH, '//*[@id="left"]/div/ol/li[1]/div/div/a')
			
 
				+
			
 
				+# webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
			
 
				+# webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
			
 
				+# time.sleep(2)
			
 
				+# elmts=driver.find_elements("xpath",'//*[@id="web"]/ol/li/div/div[1]/h3/a')
			
 
				+# domain = 'hhh.com.tw'
			
 
				+# idx=1
			
 
				+# ranking=-1
			
 
				+# domain_in_link = 0
			
 
				+# print (len(elmts))
			
 
				+# # driver.save_screenshot('c:/tmp/test.png')
			
 
				+# n=0
			
 
				+# for el in elmts:
			
 
				+#     n+=1
			
 
				+#     href=el.get_attribute('href')
			
 
				+#     txt=el.text
			
 
				+#     # print(txt)
			
 
				+#     if len(txt)>10:
			
 
				+#         if domain in href:
			
 
				+#             domain_in_link += 1
			
 
				+#             print('clicked....')
			
 
				+#             print('href:',href)
			
 
				+#             print('txt:',txt)
			
 
				+# elmt_next = driver.find_element(By.XPATH, '//*[@id="left"]/div/ol/li[1]/div/div/a[2]')
			
 
				+
			
 
				+# webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
			
 
				+# webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
			
 
				+# time.sleep(5)
			
 
				+# for i in range(20):
			
 
				+#     try:
			
 
				+#         elmt_next = driver.find_element(By.XPATH, '//*[@id="left"]/div/ol/li[1]/div/div/a[2]')
			
 
				+
			
 
				+#         webdriver.ActionChains(driver).move_to_element(elmt_next).perform()
			
 
				+#         webdriver.ActionChains(driver).move_to_element(elmt_next).click().perform()
			
 
				+#         time.sleep(5)
			
 
				+#     except:
			
 
				+#         time.sleep(200)
			
 
				+#                 webdriver.ActionChains(driver).move_to_element(el).click().perform()
			
 
				+# add_tabs = [7,9,11,13,15,7,9,11,13,15,7,9,11,13,15,7,9,11,13,15]
			
 
				+
			
 
				+# db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
			
 
				+# driver=None
			
 
				+# headers = {
			
 
				+#         "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
			
 
				+#         "Content-Type": "application/x-www-form-urlencoded"
			
 
				+# }
			
 
				+
			
 
				+# sleepoffset = 0
			
 
				+
			
 
				+# def send_msg(kw):
			
 
				+#     params = {"message": "處理關鍵字: "+kw}  
			
 
				+#     r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
			
 
				+
			
 
				+# def empty_query(q):
			
 
				+#     global driver
			
 
				+#     googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
			
 
				+#     driver.get(googleurl)
			
 
				+#     time.sleep(3)
			
 
				+
			
 
				+# def process_query(domain, target_domain, brands, query):
			
 
				+#     print(query)
			
 
				+#     sleepoffset = 0
			
 
				+#     global driver
			
 
				+#     if query == "艾立思" and "index" in target_domain:
			
 
				+#         driver.get('https://www.google.com/search?num=100&q=艾立思&rlz=1C1ONGR_zh-TWTW997TW997&ei=zjdUY_DBG9Lm-Abpgq84&start=0&sa=N&filter=0&ved=2ahUKEwjw4KeEvfT6AhVSM94KHWnBCwcQ8tMDegQIARAQ&cshid=1666463754367857&biw=1368&bih=761&dpr=2')
			
 
				+#         time.sleep(4)
			
 
				+#     else:
			
 
				+#         driver.get('https://www.google.com?num=100')
			
 
				+#         time.sleep(3)
			
 
				+#         print(driver.current_url)
			
 
				+
			
 
				+#         # elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
			
 
				+#         # ABOVE METHOD IS DEPRECATED STARTING SELENIUM 4.3.0, USE THIS
			
 
				+#         #
			
 
				+#         elmt = driver.find_element(By.XPATH, "//input[@name='q']")
			
 
				+#         time.sleep(1)
			
 
				+
			
 
				+#         elmt.send_keys(query)
			
 
				+#         elmt.send_keys(Keys.ENTER)
			
 
				+
			
 
				+#     idx=1
			
 
				+#     ranking=-1
			
 
				+#     domain_in_link = 0
			
 
				+
			
 
				+#     googleurl = driver.current_url
			
 
				+#     print(driver.current_url)
			
 
				+
			
 
				+#     if "sorry" in googleurl:
			
 
				+#         return 444
			
 
				+
			
 
				+#     elmts=driver.find_elements("xpath","//div[@class='yuRUbf']/a")
			
 
				+
			
 
				+
			
 
				+#     print (len(elmts))
			
 
				+#     # driver.save_screenshot('c:/tmp/test.png')
			
 
				+#     n=0
			
 
				+#     for el in elmts:
			
 
				+#         n+=1
			
 
				+#         href=el.get_attribute('href')
			
 
				+#         txt=el.text
			
 
				+#         if len(txt)>10:
			
 
				+#             if domain in href:
			
 
				+#                 domain_in_link += 1
			
 
				+#                 print('clicked....')
			
 
				+#                 print(href)
			
 
				+#                 print(txt)
			
 
				+
			
 
				+#                 if query == "艾立思" and "index" in target_domain and href != "https://hhh.com.tw/brand-index.php?brand_id=211":
			
 
				+#                     print("wrong site")
			
 
				+#                     continue
			
 
				+                
			
 
				+#                 webdriver.ActionChains(driver).move_to_element(el).perform()
			
 
				+#                 webdriver.ActionChains(driver).move_to_element(el).click().perform()
			
 
				+#                 print("Rank: " + str(n))
			
 
				+#                 time.sleep(15)
			
 
				+
			
 
				+#                 ''' unused
			
 
				+#                 new_windows_count = add_tabs[random.randint(0,19)]
			
 
				+#                 print(str(new_windows_count) + " new tabs")
			
 
				+#                 for i in range (0,new_windows_count):
			
 
				+#                     print("Tab " + str(i+1))
			
 
				+#                     #original_window = driver.current_window_handle
			
 
				+#                     #driver.switch_to.new_window('window')
			
 
				+#                     #driver.get(href)
			
 
				+#                     sleepoffset += 12
			
 
				+#                     driver.execute_script('window.open("'+href+'","_blank");')
			
 
				+#                     driver.execute_script("window.scrollTo(0, 600)")
			
 
				+#                     time.sleep(15)
			
 
				+#                     #driver.close()
			
 
				+#                     #driver.switch_to.window(original_window)
			
 
				+                
			
 
				+#                 if domain in target_domain:
			
 
				+#                     print("Target link found")
			
 
				+#                     time_stamp = datetime.fromtimestamp(time.time())
			
 
				+#                     time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
			
 
				+#                     db['click_results'].insert({"time_stamp": time_stamp, "brand": brands[domain], "domain": domain, "query": query, "url": href, "content": txt, "extra_windows": '0'})
			
 
				+#                 '''
			
 
				+#                 break
			
 
				+
			
 
				+#     '''if domain in target_domain:
			
 
				+#         print("Target domain found")
			
 
				+#         time_stamp = datetime.fromtimestamp(time.time())
			
 
				+#         time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
			
 
				+#         db['query_results'].insert({"time_stamp": time_stamp, "brand": brands[domain], "domain": domain, "query": query, "googleurl": googleurl, "element_count": len(elmts), "domain_in_link_count": domain_in_link})
			
 
				+#     '''        
			
 
				+
			
 
				+#     print(domain_in_link)
			
 
				+#     return 200
			
 
				+    
			
 
				+
			
 
				+# def run_once(domain, target_domain, brands, query):
			
 
				+#     global driver
			
 
				+#     result=[]
			
 
				+#     options = webdriver.ChromeOptions()
			
 
				+#     options.add_argument('--headless')
			
 
				+# #    options.add_argument("--user-agent=" +user_agent)
			
 
				+#     options.add_argument("--incognito")
			
 
				+#     options.add_argument('--no-sandbox')
			
 
				+#     options.add_argument('--disable-dev-shm-usage')
			
 
				+
			
 
				+#     driver = webdriver.Chrome(
			
 
				+#     options=options)
			
 
				+
			
 
				+#     driver.delete_all_cookies()
			
 
				+#     driver.set_window_size(1400,1000)
			
 
				+
			
 
				+#     statuscode = process_query(domain, target_domain, brands, query)
			
 
				+#     driver.quit()
			
 
				+
			
 
				+#     return statuscode
			
 
				+
			
 
				+# #execution starts here
			
 
				+
			
 
				+# def execute(domain, target_domain, brands, query_list):
			
 
				+#     print("Ctrl+C or Ctrl+Z to stop.")
			
 
				+#     statuscode = 0
			
 
				+#     st = timeit.default_timer()
			
 
				+#     try:
			
 
				+#         statuscode = run_once(domain, target_domain, brands, random.choice(query_list))
			
 
				+#     except:
			
 
				+#         traceback.print_exc()
			
 
				+#     timetaken = timeit.default_timer()-st
			
 
				+#     print("Time taken: " + str(timetaken))
			
 
				+    
			
 
				+#     print("Process returned with " + str(statuscode))
			
 
				+#     if statuscode == 444:
			
 
				+#         print("You have been caught!!!")
			
 
				+        
			
 
				+#         #notify("Clickbot " + brands[domain] + " has been caught by Google and will terminate. IP: ")
			
 
				+
			
 
				+#     extrasleep = 0
			
 
				+#     if(timetaken < 50):
			
 
				+#         extrasleep = 50 - timetaken
			
 
				+#     print("Ctrl+C or Ctrl+Z to stop now.")
			
 
				+#     print("You have " + str(10 + extrasleep) + " seconds.")
			
 
				+#     time.sleep(10 + extrasleep)
			
 
				+#     return statuscode
			
--- a/website_clickjobs/type-1/hhh_gather.py
+++ b/website_clickjobs/type-1/hhh_gather.py
@@ -24,12 +24,13 @@ def process_query(target):
 
				     global driver
			
 
				     try:
			
 
				         driver.get(target) 
			
 
				-        name=driver.find_element(By.CLASS_NAME,"infoCard__name")
			
 
				+        #name=driver.find_element(By.CLASS_NAME,"infoCard__name") v2 disabled
			
 
				         org=driver.find_element(By.CLASS_NAME,"infoCard__company")
			
 
				     except:
			
 
				         pass
			
 
				     time.sleep(10)
			
 
				-    return name.text,org.text
			
 
				+    #return name.text,org.text
			
 
				+    return org.text
			
 
				 
			
 
				 
			
 
				 def run_once(target):
			
@@ -55,16 +56,19 @@ def run_once(target):
 
				         try:
			
 
				             name="nn"
			
 
				             org="nn"
			
 
				-            name,org=process_query(t)
			
 
				+            #name,org=process_query(t)
			
 
				+            org=process_query(t)
			
 
				             print(name)
			
 
				             print(org)
			
 
				-            newstr = t + ',' + name + ',' + org
			
 
				+            #newstr = t + ',' + name + ',' + org
			
 
				+            newstr = t + ',' + org
			
 
				             alt1 = org[:2]
			
 
				             alt2 = alt1 + org[-2:]
			
 
				             alt3 = alt1 + org[-4:]
			
 
				         except:
			
 
				             print("error")
			
 
				-        data.append([t,name,org,alt1,alt2,alt3])
			
 
				+        #data.append([t,name,org,alt1,alt2,alt3])
			
 
				+        data.append([t,org])
			
 
				         time.sleep(3)
			
 
				     driver.quit()
			
 
				 
			
--- a/website_clickjobs/type-1/hhh_r.py
+++ b/website_clickjobs/type-1/hhh_r.py
--- a/website_clickjobs/type-1/hhh_r2.py
+++ b/website_clickjobs/type-1/hhh_r2.py
--- a/website_clickjobs/type-FD-N/yogoclean.py
+++ b/website_clickjobs/type-FD-N/yogoclean.py
@@ -5,6 +5,6 @@ domain = 'https://www.yogoclean.com'
 
				 target_domain = ['yogoclean.com']
			
 
				 brands={domain:'有夠讚'}
			
 
				 query_list = ('有夠讚','有夠讚')
			
 
				-sleepinterval = 1
			
 
				+sleepinterval = 3
			
 
				 
			
 
				 execute(domain, target_domain, brands, query_list, sleepinterval)
			
--- a/website_clickjobs/type-Y/_execute.py
+++ b/website_clickjobs/type-Y/_execute.py
@@ -14,7 +14,7 @@ from random import randint
 
				 #from setting import rua
			
 
				 
			
 
				 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
			
 
				-table=db['nda_log']
			
 
				+table=db['general_log']
			
 
				 path = 'C:/ChromeDriver' #pls adjust
			
 
				 path_z = 'C:/ChromeDriver' #pls adjust
			
 
				 
			
@@ -30,10 +30,12 @@ def restart_browser():
 
				 
			
 
				 resultdict={'搜尋詞':[],'網域':[],'結果標題':[],'結果網址':[],'結果名次':[]}
			
 
				 
			
 
				-def process_one(term, target, n, sr):
			
 
				+def process_one(term, target, n, confidential):
			
 
				     try:
			
 
				         print(term)
			
 
				 
			
 
				+        table=db['nda_log'] if confidential == 1 else db['general_log']
			
 
				+
			
 
				         driver=restart_browser()
			
 
				         # escaped_search_term=urllib.parse.quote(term)
			
 
				         yturl = 'https://www.youtube.com/results?search_query=' + term
			
--- a/website_clickjobs/type-Y/c1.py
+++ b/website_clickjobs/type-Y/c1.py
@@ -17,6 +17,8 @@ def pickvideo():
 
				         whitelist = ["https://www.youtube.com/watch?v=7UGiKEc2JEU","https://www.youtube.com/watch?v=By63yTOiPFQ","https://www.youtube.com/watch?v=QzyTD949cVk","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=9lJ3gnqMT4o","https://www.youtube.com/watch?v=iA4__EcJE5I","https://www.youtube.com/watch?v=fx4fTBh9PFo","https://www.youtube.com/watch?v=WC_rYXVP2g8","https://www.youtube.com/watch?v=juJTjzi4DV0","https://www.youtube.com/watch?v=D9A8S1XaPnA","https://www.youtube.com/watch?v=tbdpEt65LRI","https://www.youtube.com/watch?v=Z4mts-HrBvU","https://www.youtube.com/watch?v=Hi-IY6R7_10","https://www.youtube.com/watch?v=e2jILHgLW10"]
			
 
				     return term, random.choice(whitelist)
			
 
				 
			
 
				+is_confidential = 1 #set to 1 if true
			
 
				+
			
 
				 termlist = ["信義房屋","信義 房屋","信義房仲","信義 房仲"]
			
 
				 
			
 
				 clickvideo = 2 # set to 1 to alert if video is found (DOES NOT CLICK!), requires whitelist
			
@@ -26,5 +28,5 @@ clickvideo = 2 # set to 1 to alert if video is found (DOES NOT CLICK!), requires
 
				 if clickvideo == 1 or clickvideo == 2:
			
 
				     while True:
			
 
				         kw, target = pickvideo()
			
 
				-        process_one(kw, target, clickvideo, 0)
			
 
				+        process_one(kw, target, clickvideo, is_confidential)
			
 
				         time.sleep(10)
			
--- a/website_clickjobs/type-Y/drhuang.py
+++ b/website_clickjobs/type-Y/drhuang.py
@@ -0,0 +1,25 @@
 
				+from _execute import *
			
 
				+import random
			
 
				+
			
 
				+def pickvideo():
			
 
				+    n = 0
			
 
				+    if n==0:
			
 
				+        term = "台北牙周病醫生"
			
 
				+        whitelist = ["https://www.youtube.com/watch?v=eFamkpqbVLM","https://www.youtube.com/watch?v=aC2SIWzqvKY"]
			
 
				+    return term, random.choice(whitelist)
			
 
				+
			
 
				+is_confidential = 0 #set to 1 if true
			
 
				+
			
 
				+termlist = ["信義房屋","信義 房屋","信義房仲","信義 房仲"]
			
 
				+
			
 
				+clickvideo = 2 # set to 1 to alert if video is found (DOES NOT CLICK!), requires whitelist
			
 
				+# ^ set to 2 to click the video when found, requires whitelist
			
 
				+# ^ any other value will only search for videos based on termlist
			
 
				+
			
 
				+if clickvideo == 1 or clickvideo == 2:
			
 
				+    while True:
			
 
				+        kw, target = pickvideo()
			
 
				+        kw = kw + "推薦" if random.randint(0,3) < 2 else kw
			
 
				+        kw = kw + " 黃湘雲" if random.randint(0,3) < 2 else kw
			
 
				+        process_one(kw, target, clickvideo, is_confidential)
			
 
				+        time.sleep(10)
		`@@ -0,0 +1 @@`
		`+記得要修改chrome版本和docker名稱才能多個使用,另外雖然我有設定重啟,但時間長了還是會發生錯誤,並須重啟(暫時還不知道原因)`