from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os import datetime import urllib.parse from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import codecs import random from bs4 import BeautifulSoup import requests import time import rpyc import sys import docker import googlesearch import codecs import sys import time import dataset import os from selenium.webdriver.common.keys import Keys headers = { "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2", "Content-Type": "application/x-www-form-urlencoded" } def send_msg(kw): params = {"message": "處理關鍵字: "+kw} r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params) def empty_query(q): global driver googleurl='https://www.google.com/search?q='+urllib.parse.quote(q) driver.get(googleurl) time.sleep(3) def scrolling(driver,pgnum): ub = driver.find_element_by_css_selector('body') for i in range(pgnum): ub.send_keys(Keys.PAGE_DOWN) if pgnum>1: time.sleep(0.3) def process_query(qs,number_results=10,language_code='en',pat='hhh.com.tw'): global driver escaped_search_term=urllib.parse.quote(qs) # escaped_search_term = qs.replace(' ', '+') # googleurl='https://www.google.com/search?q='+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,language_code) driver.get(googleurl) elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a") idx=0 for elmt in elmts: try: href=elmt.get_attribute('href') print(str(idx)+': '+href) if pat in href: return idx idx+=1 except: print('href exception') try: elmt=driver.find_element_by_xpath("//a[@id='pnnext']") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() except: print('pnnext exception') return None time.sleep(4) elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a") for elmt in elmts: try: href=elmt.get_attribute('href') print(str(idx)+': '+href) if pat in href: return idx idx+=1 except: print('href2 exception') result=[] driver=None def restart_browser(): options = webdriver.ChromeOptions() # options.add_argument("--proxy-server=socks5://130.61.93.198:1080") driver=webdriver.Chrome(desired_capabilities=options.to_capabilities()) driver.set_window_size(1400,1000) return driver db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4') table=db['yt_list'] driver=restart_browser() driver.get('https://www.youtube.com/c/%E5%B9%B8%E7%A6%8F%E7%A9%BA%E9%96%93gorgeousspace/videos?view=0&sort=dd&flow=grid') time.sleep(1) scrolling(driver,60) try: elmts=driver.find_elements_by_xpath("//a[@id='video-title']") for elmt in elmts: url=elmt.get_attribute('href') url=url.replace('https://www.youtube.com/watch?v=','') table.insert({'folder':'upload','video':url}) except: print('pnnext exception')