#import redis import time import traceback #import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time import redis import sys import codecs import random import os import time from userAgentRandomizer import userAgents driver=None db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') def re_get_webdriver(): global driver result=[] if driver is not None: print('closing....') driver.quit() os.system('killall chrome') print('quit....') driver=None try: ua = userAgents() user_agent = ua.random() options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.add_argument("--headless") print(user_agent) # options.add_argument("--user-agent=" +user_agent) options.add_argument("--incognito") driver=None try: driver = webdriver.Chrome(options=options) except: # driver.quit() # os.system('pkill -f ') os.system('kill %d' % os.getpid()) sys.exit() return driver.set_window_size(1400,1000) return except: import traceback traceback.print_exc() driver=None return None def from_shopping(kw): global driver driver.get('https://shopping.google.com') time.sleep(5) elmt = driver.find_element(By.XPATH, "//input[@id='REsRA']") elmt.send_keys('幸福空間') elmt.send_keys(Keys.ENTER) #hits space time.sleep(7) elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) def from_book(kw): global driver driver.get('https://books.google.com/') time.sleep(5) elmt = driver.find_element(By.XPATH, "//input[@id='oc-search-input']") elmt.send_keys('幸福空間') elmt.send_keys(Keys.ENTER) #hits space time.sleep(7) elmt = driver.find_element(By.XPATH, "//div[@class='hdtb-mitem']/a[contains(text(),'全部') or contains(text(),'All')]") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) def from_wiki(kw): global driver driver.get('https://en.wikipedia.org/wiki/Google_Search') time.sleep(4) elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(6) def from_bing(kw): global driver driver.get('https://www.bing.com/search?q=google') time.sleep(4) elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(6) def from_ecosia(kw): global driver driver.get('https://www.ecosia.org/search?method=index&q=GOOGLE') time.sleep(4) elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(6) def from_brave(kw): global driver driver.get('https://search.brave.com/search?q=google&source=web') time.sleep(4) elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(6) def from_duckduckgo(kw): global driver driver.get('https://duckduckgo.com/?q=google') time.sleep(4) elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) # time.sleep(9999) elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(6) def from_ekoru(kw): global driver driver.get('https://www.ekoru.org/?q=google') time.sleep(4) elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) # time.sleep(9999) elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(6) def from_yahoo(kw): global driver driver.get('https://search.yahoo.com/search?p=google') time.sleep(4) elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(2) driver.switch_to.window(driver.window_handles[1]) time.sleep(3) print(driver.current_url) elmt = driver.find_element(By.XPATH, "//input[@name='q']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(6) def from_gibiru(kw): global driver driver.get('https://gibiru.com/results.html?q=google') time.sleep(4) elmt = driver.find_element(By.XPATH, "//a[contains(@href,'https://www.google.com/')]") webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(2) driver.switch_to.window(driver.window_handles[1]) time.sleep(3) print(driver.current_url) elmt = driver.find_element(By.XPATH, "//input[@name='q']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(6) def run_once(jsobj): table=db['rank_detection'] print(jsobj) global driver # i=random.randint(0,9) i=100 if driver is None: time.sleep(8) re_get_webdriver() if driver is None: return try: kw=jsobj['kw'] domain=jsobj['domain'] # kw='leo 娛樂城 484' # kw='leo 娛樂城 3011' # domain='tha484.com' # domain='tha3011.com' if i==0: from_book(kw) elif i==1: from_shopping(kw) elif i==2: from_wiki(kw) elif i==3: from_bing(kw) elif i==4: from_duckduckgo(kw) elif i==5: from_yahoo(kw) elif i==6: from_gibiru(kw) elif i==7: from_ekoru(kw) elif i==8: from_ecosia(kw) elif i==9: from_brave(kw) # time.sleep(9999) driver.get('https://www.google.com?num=100') time.sleep(3) print(driver.current_url) elmt = driver.find_element(By.XPATH, "//input[@name='q']") time.sleep(1) elmt.send_keys(kw) elmt.send_keys(Keys.ENTER) time.sleep(6) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") numresults=len(elmts) print('搜尋結果數量',numresults) idx=1 found=False for elmt in elmts: href=elmt.get_attribute('href') txt=elmt.text if len(txt)>10: if domain in href: print('found....') print('clicked....') print(href) print(txt) print("ranking", idx) table.insert({'ranking':idx,'kw':kw,'results':numresults,'url':href,'title':txt}) found=True webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) break idx+=1 if not found: table.insert({'ranking':-1,'kw':kw,'results':numresults,'url':'','title':'未收錄'}) except: print('exception') traceback.print_exc() driver.quit() sys.exit() r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9') ##data=r.get('personal_seo') #jstext=data.decode('utf-8') #jsobj=json.loads(jstext) #js=random.choice(jsobj) js=['seo','台北','新北','新竹','竹北','台灣','最強','集仕多','新聞','是什麼','搜尋','優化','如何','元宇宙','加速','排名','查詢','關鍵字','計劃','曝光','推薦','工具','google','排行','排序','公司','提升','收費','行情','網站','網頁','youtube','計畫','AI','人工智慧','deep learning','深度學習','評分','研究','價格','工具','論壇','自然','規則','流量','建議','寫作','技巧','課程','測試','因素','改善','購買','谷歌','成本','推廣','人員','方式','行銷','外貿','企業','電商','電子商務','商務','改版','分析','老師','講師','顧問','提高','影片','主播'] elmt1=random.choice(js) elmt2=random.choice(js) run_once({'domain':'choozmo.com','kw':elmt1+" "+elmt2+" choozmo"})