| 
					
				 | 
			
			
				@@ -0,0 +1,138 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium import webdriver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import traceback 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import argparse 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#import schedule 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.common.keys import Keys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from selenium.webdriver.common.by import By 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import pandas as pd 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from PIL import Image 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import dataset 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from datetime import datetime 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from random import randint 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#from setting import rua 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+table=db['nda_log'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+path = 'C:/ChromeDriver' #pls adjust 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+path_z = 'C:/ChromeDriver' #pls adjust 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def restart_browser(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options = webdriver.ChromeOptions() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    #options.add_argument("user-agent=%s" % rua()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument('--headless') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument('--incognito') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    options.add_argument('--no-sandbox') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver=webdriver.Chrome(options=options) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver.set_window_size(950, 20000) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return driver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+resultdict={'搜尋詞':[],'網域':[],'結果標題':[],'結果網址':[],'結果名次':[]} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def process_one(term, target, n, sr): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(term) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        driver=restart_browser() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # escaped_search_term=urllib.parse.quote(term) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        yturl = 'https://www.youtube.com/results?search_query=' + term 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        driver.get(yturl) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        time.sleep(6) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        fname=term.replace(' ','_') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        df=pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        more=driver.find_elements(By.XPATH, "//div[@id='more']") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for m in more: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                morebtn = m.find_element(By.TAG_NAME, "yt-formatted-string") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                webdriver.ActionChains(driver).move_to_element(morebtn).perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                webdriver.ActionChains(driver).move_to_element(morebtn).click().perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print("nope") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        #elmts=driver.find_elements(By.TAG_NAME,"ytd-video-renderer") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        elmts=driver.find_elements(By.XPATH, "//div[@class='text-wrapper style-scope ytd-video-renderer']") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        count = len(elmts) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print(count) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        time.sleep(3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        cnt=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'作者':[],'結果說明':[],'結果名次':[]} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+         
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        found=0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        clickelmt = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        resultttl = "X" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        resultlink = "X" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        resultrank = "X" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for elmt in elmts: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                dt=elmt.find_element(By.TAG_NAME,"h3") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                title=dt.text 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                href=dt.find_element(By.TAG_NAME,"a").get_attribute('href') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                author=elmt.find_element(By.ID, "channel-info").find_element(By.ID,"text-container") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                #info=elmt.find_element(By.XPATH, "//yt-formatted-string[@class='metadata-snippet-text style-scope ytd-video-renderer']").text 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                #print(info) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                datadict['搜尋詞'].append(term) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                datadict['結果標題'].append(title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                datadict['結果網址'].append(href) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                datadict['作者'].append(author.text) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                #datadict['結果說明'].append(info) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                datadict['結果名次'].append(str(cnt)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if (n==1 or n==2) and target in href and found==0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    found=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    clickelmt = elmt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    resultrank = str(cnt) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    resultlink = href 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    resultttl = elmt.text 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    print("Target domain found") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    print(resultttl) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    print(resultlink) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    print(resultrank) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                cnt+=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print('href2 exception') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                traceback.print_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if len(datadict['結果標題'])<=0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print('None',term,) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # driver.quit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # sys.exit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if n==2 and found==1: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            webdriver.ActionChains(driver).move_to_element(clickelmt).perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print("Clicked") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            time_stamp = datetime.fromtimestamp(time.time()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            table.insert({'ranking':resultrank,'kw':term,'results':count,'url':resultlink,'title':resultttl,'dt':time_stamp}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            time.sleep(30) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        driver.quit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        print('completed') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        #time.sleep(randint(20,40)) #adjustable 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        traceback.print_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    #db_local.close() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def execute(termlist): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for term in termlist: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        process_one(term) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        parser = argparse.ArgumentParser() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        parser.add_argument('--loop',action="store_true") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        args = parser.parse_args() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+# if args.loop: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     schedule.every(0.4).minutes.do(process_one) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     # print('今天開始') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     # schedule.every().day.at('9:30').do(process_one) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+# 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     while True: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#         schedule.run_pending() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#         time.sleep(1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 |