|
@@ -0,0 +1,138 @@
|
|
|
+from selenium import webdriver
|
|
|
+import traceback
|
|
|
+import time
|
|
|
+import argparse
|
|
|
+#import schedule
|
|
|
+from selenium.webdriver.common.keys import Keys
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+import pandas as pd
|
|
|
+from PIL import Image
|
|
|
+import dataset
|
|
|
+from datetime import datetime
|
|
|
+from random import randint
|
|
|
+
|
|
|
+#from setting import rua
|
|
|
+
|
|
|
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
+table=db['nda_log']
|
|
|
+path = 'C:/ChromeDriver' #pls adjust
|
|
|
+path_z = 'C:/ChromeDriver' #pls adjust
|
|
|
+
|
|
|
+def restart_browser():
|
|
|
+ options = webdriver.ChromeOptions()
|
|
|
+ #options.add_argument("user-agent=%s" % rua())
|
|
|
+ options.add_argument('--headless')
|
|
|
+ options.add_argument('--incognito')
|
|
|
+ options.add_argument('--no-sandbox')
|
|
|
+ driver=webdriver.Chrome(options=options)
|
|
|
+ driver.set_window_size(950, 20000)
|
|
|
+ return driver
|
|
|
+
|
|
|
+resultdict={'搜尋詞':[],'網域':[],'結果標題':[],'結果網址':[],'結果名次':[]}
|
|
|
+
|
|
|
+def process_one(term, target, n, sr):
|
|
|
+ try:
|
|
|
+ print(term)
|
|
|
+
|
|
|
+ driver=restart_browser()
|
|
|
+ # escaped_search_term=urllib.parse.quote(term)
|
|
|
+ yturl = 'https://www.youtube.com/results?search_query=' + term
|
|
|
+ driver.get(yturl)
|
|
|
+ time.sleep(6)
|
|
|
+ fname=term.replace(' ','_')
|
|
|
+
|
|
|
+ df=pd.DataFrame()
|
|
|
+
|
|
|
+ more=driver.find_elements(By.XPATH, "//div[@id='more']")
|
|
|
+ for m in more:
|
|
|
+ try:
|
|
|
+ morebtn = m.find_element(By.TAG_NAME, "yt-formatted-string")
|
|
|
+ webdriver.ActionChains(driver).move_to_element(morebtn).perform()
|
|
|
+ webdriver.ActionChains(driver).move_to_element(morebtn).click().perform()
|
|
|
+ except:
|
|
|
+ print("nope")
|
|
|
+
|
|
|
+ time.sleep(5)
|
|
|
+
|
|
|
+ #elmts=driver.find_elements(By.TAG_NAME,"ytd-video-renderer")
|
|
|
+ elmts=driver.find_elements(By.XPATH, "//div[@class='text-wrapper style-scope ytd-video-renderer']")
|
|
|
+ count = len(elmts)
|
|
|
+ print(count)
|
|
|
+ time.sleep(3)
|
|
|
+
|
|
|
+ cnt=1
|
|
|
+ datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'作者':[],'結果說明':[],'結果名次':[]}
|
|
|
+
|
|
|
+ found=0
|
|
|
+ clickelmt = None
|
|
|
+ resultttl = "X"
|
|
|
+ resultlink = "X"
|
|
|
+ resultrank = "X"
|
|
|
+
|
|
|
+ for elmt in elmts:
|
|
|
+ try:
|
|
|
+ dt=elmt.find_element(By.TAG_NAME,"h3")
|
|
|
+ title=dt.text
|
|
|
+ href=dt.find_element(By.TAG_NAME,"a").get_attribute('href')
|
|
|
+ author=elmt.find_element(By.ID, "channel-info").find_element(By.ID,"text-container")
|
|
|
+ #info=elmt.find_element(By.XPATH, "//yt-formatted-string[@class='metadata-snippet-text style-scope ytd-video-renderer']").text
|
|
|
+ #print(info)
|
|
|
+ datadict['搜尋詞'].append(term)
|
|
|
+ datadict['結果標題'].append(title)
|
|
|
+ datadict['結果網址'].append(href)
|
|
|
+ datadict['作者'].append(author.text)
|
|
|
+ #datadict['結果說明'].append(info)
|
|
|
+ datadict['結果名次'].append(str(cnt))
|
|
|
+ if (n==1 or n==2) and target in href and found==0:
|
|
|
+ found=1
|
|
|
+ clickelmt = elmt
|
|
|
+ resultrank = str(cnt)
|
|
|
+ resultlink = href
|
|
|
+ resultttl = elmt.text
|
|
|
+ print("Target domain found")
|
|
|
+ print(resultttl)
|
|
|
+ print(resultlink)
|
|
|
+ print(resultrank)
|
|
|
+ cnt+=1
|
|
|
+ except:
|
|
|
+ print('href2 exception')
|
|
|
+ traceback.print_exc()
|
|
|
+
|
|
|
+ if len(datadict['結果標題'])<=0:
|
|
|
+ print('None',term,)
|
|
|
+ # driver.quit()
|
|
|
+ # sys.exit()
|
|
|
+
|
|
|
+ if n==2 and found==1:
|
|
|
+ webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
|
|
|
+ webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
|
|
|
+ print("Clicked")
|
|
|
+ time_stamp = datetime.fromtimestamp(time.time())
|
|
|
+ time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
+ table.insert({'ranking':resultrank,'kw':term,'results':count,'url':resultlink,'title':resultttl,'dt':time_stamp})
|
|
|
+ time.sleep(30)
|
|
|
+
|
|
|
+ driver.quit()
|
|
|
+ print('completed')
|
|
|
+ #time.sleep(randint(20,40)) #adjustable
|
|
|
+ except:
|
|
|
+ traceback.print_exc()
|
|
|
+
|
|
|
+ #db_local.close()
|
|
|
+
|
|
|
+def execute(termlist):
|
|
|
+ for term in termlist:
|
|
|
+ process_one(term)
|
|
|
+ parser = argparse.ArgumentParser()
|
|
|
+ parser.add_argument('--loop',action="store_true")
|
|
|
+ args = parser.parse_args()
|
|
|
+
|
|
|
+# if args.loop:
|
|
|
+# schedule.every(0.4).minutes.do(process_one)
|
|
|
+# # print('今天開始')
|
|
|
+# # schedule.every().day.at('9:30').do(process_one)
|
|
|
+#
|
|
|
+# while True:
|
|
|
+# schedule.run_pending()
|
|
|
+# time.sleep(1)
|
|
|
+
|