# import redis import time import traceback # import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import dataset from selenium.webdriver.common.keys import Keys import json import random import time import sys import codecs import pandas as pd import random import os import time import datetime from selenium.webdriver.chrome.service import Service import dataset import pymysql pymysql.install_as_MySQLdb() import requests driver = None db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') headers = { "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi", "Content-Type": "application/x-www-form-urlencoded" } def send_msg(kw): params = {"message": kw} r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params) def re_get_webdriver(): global port global driver result = [] if driver is not None: print('closing....') driver.quit() os.system('killall chrome') print('quit....') driver = None try: # ua = userAgents() # # user_agent = ua.random() s = Service('/Users/mac/Downloads/121/chromedriver') options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") # options.add_argument("--headless") options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'}) options.add_argument("--incognito") driver = None try: if os.name == 'nt': driver = webdriver.Chrome(options=options) else: driver = webdriver.Chrome(service=s, options=options) except: traceback.print_exc() return driver.delete_all_cookies() driver.set_window_size(950, 20000) return except: traceback.print_exc() driver = None return None def scrolling(driver, pgnum): ub = driver.find_element("css selector",'body') for i in range(pgnum): ub.send_keys(Keys.PAGE_DOWN) if pgnum > 1: time.sleep(0.3) def run_once(jsobj): table = db['google_rank'] date = jsobj['date'] print(jsobj) global driver i = 100 if driver is None: time.sleep(8) re_get_webdriver() if driver is None: return try: kw = jsobj['kw'] fname = jsobj['fname'] url = jsobj['url'] # if jsobj.get('domain') is None: # exclude = jsobj['exclude'] # domain = None # else: # domain = jsobj['domain'] # exclude = None city_map = {'chicago': ['42.04866173771369', '-87.68260072643513'], 'miami': ['25.764458843530548', '-80.19787522585152'], 'wc': ['38.96071674051165', '-77.03155367248387'], 'ny': ['40.76774141099703', '-73.98439238945637']} city = jsobj['fname'] print(city) Map_coordinates = dict({ "latitude": float(city_map[f"{city}"][0]), "longitude": float(city_map[f"{city}"][1]), "accuracy": 100 }) # 芝加哥、邁阿密、紐約、華盛頓 # driver.execute_cdp_cmd("Emulation.setGeolocationOverride", Map_coordinates) # driver.get('https://www.google.com?num=100&lr=lang_en') driver.get(url) # print(driver.current_url) # elmt = driver.find_element(By.XPATH, "//input[@name='q']") # time.sleep(1) # elmt.send_keys(kw) # # elmt.send_keys(Keys.ENTER) time.sleep(3) scrolling(driver, 10) time.sleep(20) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a") numresults = len(elmts) print('搜尋結果數量', numresults) time.sleep(20) # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/angelo koo\/' +date +fname + ".png") if numresults == 0: send_msg('stop working...') sys.exit() datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': [], '結果說明': []} df = pd.DataFrame() idx = 1 found = False test_lst = [] txt_dict = {} for elmt in elmts: href = elmt.get_attribute('href') txt = elmt.text desc = None try: elmt2 = elmt.find_element(By.XPATH, "./../../..//div[@data-content-feature=1]") desc = elmt2.text except: desc = None # print(desc) table.insert( {'title': elmt.text, 'url': href, 'keyword': kw, 'dt': datetime.datetime.now(), 'ranking': idx,'description':fname}) datadict['搜尋詞'].append(kw) datadict['結果標題'].append(txt) datadict['結果網址'].append(href) datadict['結果名次'].append(str(idx)) datadict['結果說明'].append(desc) # if len(txt) > 10: # if domain is not None: # for d in domain: # if d in href: # print('found....') # print('clicked....') # print(href) # print(txt) # print("ranking", idx) # found = True # return # else: # ex = False # for ee in exclude: # if ee in href: # ex = True # if not ex: # test_lst.append(elmt) # txt_dict[elmt] = txt idx += 1 # if exclude is not None: # print('exclude') # elmt = random.choice(test_lst[5:]) # print(elmt) # print(txt_dict[elmt]) # # webdriver.ActionChains(driver).move_to_element(elmt).perform() # # elmt.click() # webdriver.ActionChains(driver).move_to_element(elmt).click().perform() # time.sleep(5) # # if not found: # table.insert( # {'ranking': -1, 'kw': kw, 'results': numresults, 'url': '', 'title': '未收錄', 'descrption': desc}) df['搜尋詞'] = datadict['搜尋詞'] df['結果標題'] = datadict['結果標題'] df['結果網址'] = datadict['結果網址'] df['結果名次'] = datadict['結果名次'] df['結果說明'] = datadict['結果說明'] print('/Users/mac/Documents/wk/'+date+fname+".csv") df.to_csv('/Users/mac/Documents/wk/'+date+fname+".csv") except: print('exception') traceback.print_exc() # time.sleep(9999) # driver.save_screenshot('c:/tmp/seo/'+kw+".png") driver.quit() sys.exit() d = {'ny':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr", 'wc':"https://www.google.com/search?q=angelo%20koo&hl=en&gl=us&num=100&uule=w+CAIQICItV2FzaGluZ3RvbixEaXN0cmljdCBvZiBDb2x1bWJpYSxVbml0ZWQgU3RhdGVz&gws_rd=cr#gws_rd=cr&ip=1", 'miami':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIbTWlhbWksRmxvcmlkYSxVbml0ZWQgU3RhdGVz&gws_rd=cr", 'chicago':"https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr"} location = 'chicago' run_once({'kw':'angelo koo','fname':location,'date':'0229','url':d[location]}) ####手動截圖:須按右下角的設定選擇區域###### ny="https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr" wc="https://www.google.com/search?q=angelo%20koo&hl=en&gl=us&num=100&uule=w+CAIQICItV2FzaGluZ3RvbixEaXN0cmljdCBvZiBDb2x1bWJpYSxVbml0ZWQgU3RhdGVz&gws_rd=cr#gws_rd=cr&ip=1" miami="https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIbTWlhbWksRmxvcmlkYSxVbml0ZWQgU3RhdGVz&gws_rd=cr" chicago="https://www.google.com/search?q=angelo+koo&hl=en&gl=us&num=100&uule=w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr"