# import redis import time import traceback # import json from selenium import webdriver from selenium.webdriver.chrome.service import Service import undetected_chromedriver as uc from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time # import urllib import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time # import redis import sys import codecs import random import datetime import os import time import requests import urllib.parse import ast driver = None db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres') # db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') # headers = { # "Authorization": "Bearer " + "ygIurYIfWgHj6HrQjOnVGh4rjXajZkeHuBYe12v8nTN", # "Content-Type": "application/x-www-form-urlencoded" headers = { "Authorization": "Bearer " + "OZDcq7sVKwr3F6YNLtBF3LuIgpa4Ql9eAnBWeD7sHTJ", "Content-Type": "application/x-www-form-urlencoded" } def send_msg(kw): params = {"message": kw} print('通知結果', params) r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params) def re_get_webdriver(): global port global driver global portnum global is_docker result = [] if driver is not None: print('closing....') driver.quit() print('quit....') driver = None try: s = Service('C:\/Users\/s1301\/Downloads\/136\/chromedriver-win32\/chromedriver.exe') options = uc.ChromeOptions() # options.add_argument('--headless') options.add_argument("--window-size=200,100") # 縮小視窗 options.add_argument("--window-position=-32000,-32000") # 移到螢幕外 # driver = webdriver.Chrome(options=options, service=s) driver = uc.Chrome(options=options) except: traceback.print_exc() driver = None return None return driver def run_once(jsobj): table = db['seo_jobs_ranking'] history = db['seo_search_history'] nda_log = db['nda_log'] delete_kw = db['delete_kw'] seo = db['seo'] print(jsobj) i = 100 while True: driver = re_get_webdriver() print('re_get_webdriver') if driver is not None: break time.sleep(3) try: kw = jsobj['kw'] domain = jsobj['domain'] googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 20, 'zh-TW') # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW') driver.get(googleurl) time.sleep(6) print(driver.current_url) # elmt = driver.find_element(By.XPATH, "//input[@name='q']") # time.sleep(1) # elmt.send_keys(kw) # elmt.send_keys(Keys.ENTER) # time.sleep(10) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a") # elmts = driver.find_elements(By.XPATH, "//div[@class='xe8e1b']//a") numresults = len(elmts) print('搜尋結果數量', numresults) if numresults == 0: print(driver.current_url) print(driver.title) sys.exit() # time.sleep(9999) idx = 1 found = 0 test_lst = [] clickelmt = None neg_count = 0 neg_total = 0 clickidx = 0 clickhref = '' clicktitle = '' for elmt in elmts: href = elmt.get_attribute('href') # print(href) txt = elmt.text history.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,'dt':datetime.datetime.now()}) if domain in href: print('found....') print(href) print(txt) print("ranking", idx) found = True clickelmt = elmt clickidx = idx clickhref = href clicktitle = txt found = 1 else: if found == 1: not_found = 0 else: not_found = 1 idx += 1 if not_found == 1: print('未收錄') nda_log.insert({'ranking': -1, 'kw': kw, 'results': numresults, 'url': href, 'title': '未收錄','dt': datetime.datetime.now(), 'client': jsobj['client']}) seo.delete(kw=kw, domain=domain) delete_kw.insert({'kw':kw,'domain':domain,'cust':jsobj['client'], 'dt':datetime.datetime.now()}) msg_1 = '未收錄:'+kw+' '+domain msg_2 = jsobj['delete_kw_count'] send_msg(msg_1 + "\n" + str(msg_2)) else: nda_log.insert({'ranking': clickidx, 'kw': kw, 'results': numresults, 'url': clickhref, 'title': clicktitle,'dt': datetime.datetime.now(), 'client': jsobj['client'], 'type':''}) webdriver.ActionChains(driver).move_to_element(clickelmt).perform() webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform() print('clicked....') time.sleep(15) except: traceback.print_exc() print('exception') traceback.print_exc() # db.close() driver.quit() while True: kw_dict = {'邱德馨 linkedin': 'linkedin.com', '邱德馨 facebook': "facebook.com", '邱德馨 樹脂市場': "vocus.cc", '邱德馨 減碳': "ctwant.com", '邱德馨 關懷員工': 'news.aimedium.org', '邱德馨 國喬': 'vocus.cc'} kw_ = random.choice(list(kw_dict.items())) kw = kw_[0] domain = kw_[1] print(kw,domain) run_once({'domain':domain,'kw':kw,'client':'HHH'}) # db.close() print('等待下次執行') time.sleep(90)