import time import traceback #import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time import sys import codecs import random import os import time #from userAgentRandomizer import userAgents driver=None db = dataset.connect('postgresql://postgres:eyJhbGciOiJI@172.105.241.163:5432/postgres') #db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') table=db['gnews_clicks'] def scrolling(driver,pgnum): ub = driver.find_element(By.CSS_SELECTOR,'body') for i in range(pgnum): ub.send_keys(Keys.PAGE_DOWN) if pgnum>1: time.sleep(0.3) def re_get_webdriver(): global driver result=[] if driver is not None: print('closing....') driver.quit() os.system('killall chrome') print('quit....') driver=None try: # ua = userAgents() # user_agent = ua.random() options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.add_argument("--headless") options.add_argument("--force-dark-mode") options.add_argument('--start-maximized') # print(user_agent) # options.add_argument("--user-agent=" +user_agent) options.add_argument("--incognito") driver=None try: driver = webdriver.Chrome(options=options) except: # driver.quit() # os.system('pkill -f ') os.system('kill %d' % os.getpid()) sys.exit() return driver.set_window_size(1920, 19200) return except: import traceback traceback.print_exc() driver=None return None def run_once(jsobj): print(jsobj) global driver global table # i=random.randint(0,9) i=10 if driver is None: time.sleep(8) re_get_webdriver() if driver is None: return try: kw=jsobj['kw'] fname=jsobj['fname'] if jsobj.get('domain') is None: # exclude=jsobj['exclude'] exclude=None domain=None else: domain=jsobj['domain'] exclude=None # q='裝潢' q=jsobj['kw'] driver.get('https://news.google.com/search?q='+q+'&hl=zh-TW&gl=TW&ceid=TW%3Azh-Hant') time.sleep(2) print(driver.current_url) time.sleep(2) # scrolling(driver,20) # elmts = driver.find_elements(By.XPATH, "//h3[@class='ipQwMb ekueJc RD0gLb']/a") elmts = driver.find_elements(By.XPATH, "//a[@class='JtKRv' and @jsaction='click:kkIcoc;']") numresults=len(elmts) print('搜尋結果數量',numresults) datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[],'結果說明':[]} idx=1 found=False test_lst=[] txt_dict={} for elmt in elmts: href=elmt.get_attribute('href') txt=elmt.text desc=None try: desc=txt # elmt2=elmt.find_element(By.XPATH, "./../../..//div[@data-content-feature=1]") # desc=elmt2.text except: desc=None if len(txt)>10: if domain is not None: for d in domain: if d in txt: print('found....') print('clicked....') print(href) print(txt) print("ranking", idx) found=True webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(6) table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt}) return else: ex=False if not ex: test_lst.append(elmt) txt_dict[elmt]=txt idx+=1 # time.sleep(9999) if exclude is not None: print('exclude') elmt=random.choice(test_lst[5:]) print(elmt) print(txt_dict[elmt]) webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(6) if not found: table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'}) except: print('exception') traceback.print_exc() entries=[] entry={'kw':'啟翔輕金屬 樂鋁屋','domain':['永續實踐再創新'],'ranking':'-1','fname':'fname'} entries.append(entry) entry={'kw':'啟翔輕金屬 防疫門','domain':['政治大學'],'ranking':'-1','fname':'fname'} entries.append(entry) entry={'kw':'啟翔輕金屬 鋁鋁創新','domain':['把痛點變新商機,'],'ranking':'-1','fname':'fname'} entries.append(entry) entry={'kw':'啟翔輕金屬 台灣國際室內設計','domain':['台灣國際室內設計'],'ranking':'-1','fname':'fname'} entries.append(entry) entry={'kw':'啟翔輕金屬 人工智慧','domain':['人工智慧'],'ranking':'-1','fname':'fname'} entries.append(entry) entry={'kw':'啟翔輕金屬 緬甸','domain':['緬甸'],'ranking':'-1','fname':'fname'} entries.append(entry) entry={'kw':'啟翔輕金屬 鋁業','domain':['臺灣鋁業'],'ranking':'-1','fname':'fname'} entries.append(entry) #entries.append(entry) entry=random.choice(entries) run_once(entry) #run_once({'kw':'啟翔 防疫門','domain':'政治大學','ranking':'-1','fname':'fname'}) #run_once({'kw':'啟翔輕金屬 防疫門','domain':['政治大學'],'ranking':'-1','fname':'fname'})r #for c in cursor: # run_once({'kw':c['kw']})