|
@@ -0,0 +1,217 @@
|
|
|
|
+# import redis
|
|
|
|
+import time
|
|
|
|
+import traceback
|
|
|
|
+# import json
|
|
|
|
+from selenium import webdriver
|
|
|
|
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
|
|
+import time
|
|
|
|
+import os
|
|
|
|
+from selenium.webdriver.support.ui import WebDriverWait
|
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
|
+from selenium.webdriver.support import expected_conditions as EC
|
|
|
|
+import dataset
|
|
|
|
+from selenium.webdriver.common.keys import Keys
|
|
|
|
+import json
|
|
|
|
+import random
|
|
|
|
+import time
|
|
|
|
+import redis
|
|
|
|
+import sys
|
|
|
|
+import codecs
|
|
|
|
+import pandas as pd
|
|
|
|
+import random
|
|
|
|
+import os
|
|
|
|
+import time
|
|
|
|
+import datetime
|
|
|
|
+from selenium.webdriver.chrome.service import Service
|
|
|
|
+import dataset
|
|
|
|
+import pymysql
|
|
|
|
+
|
|
|
|
+pymysql.install_as_MySQLdb()
|
|
|
|
+
|
|
|
|
+from userAgentRandomizer import userAgents
|
|
|
|
+import requests
|
|
|
|
+
|
|
|
|
+driver = None
|
|
|
|
+
|
|
|
|
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
|
+
|
|
|
|
+headers = {
|
|
|
|
+ "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
|
|
|
|
+ "Content-Type": "application/x-www-form-urlencoded"
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def send_msg(kw):
|
|
|
|
+ params = {"message": kw}
|
|
|
|
+ r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def re_get_webdriver():
|
|
|
|
+ global port
|
|
|
|
+ global driver
|
|
|
|
+ result = []
|
|
|
|
+ if driver is not None:
|
|
|
|
+ print('closing....')
|
|
|
|
+ driver.quit()
|
|
|
|
+ os.system('killall chrome')
|
|
|
|
+ print('quit....')
|
|
|
|
+ driver = None
|
|
|
|
+ try:
|
|
|
|
+ ua = userAgents()
|
|
|
|
+
|
|
|
|
+ user_agent = ua.random()
|
|
|
|
+
|
|
|
|
+ options = webdriver.ChromeOptions()
|
|
|
|
+ options.add_argument("--no-sandbox")
|
|
|
|
+ options.add_argument("--disable-dev-shm-usage")
|
|
|
|
+ # options.add_argument("--headless")
|
|
|
|
+ print(user_agent)
|
|
|
|
+ options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'})
|
|
|
|
+ options.add_argument("--incognito")
|
|
|
|
+ driver = None
|
|
|
|
+ try:
|
|
|
|
+ if os.name == 'nt':
|
|
|
|
+ driver = webdriver.Chrome(options=options)
|
|
|
|
+
|
|
|
|
+ else:
|
|
|
|
+ driver = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Downloads\chromedriver_108\chromedriver', options=options)
|
|
|
|
+ except:
|
|
|
|
+ traceback.print_exc()
|
|
|
|
+ return
|
|
|
|
+ driver.delete_all_cookies()
|
|
|
|
+ driver.set_window_size(950, 20000)
|
|
|
|
+ return
|
|
|
|
+ except:
|
|
|
|
+ traceback.print_exc()
|
|
|
|
+ driver = None
|
|
|
|
+ return None
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def scrolling(driver, pgnum):
|
|
|
|
+ ub = driver.find_element("css selector",'body')
|
|
|
|
+ for i in range(pgnum):
|
|
|
|
+ ub.send_keys(Keys.PAGE_DOWN)
|
|
|
|
+ if pgnum > 1:
|
|
|
|
+ time.sleep(0.3)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def run_once(jsobj):
|
|
|
|
+ table = db['nda_log']
|
|
|
|
+
|
|
|
|
+ print(jsobj)
|
|
|
|
+ global driver
|
|
|
|
+
|
|
|
|
+ if driver is None:
|
|
|
|
+ time.sleep(8)
|
|
|
|
+ re_get_webdriver()
|
|
|
|
+ if driver is None:
|
|
|
|
+ return
|
|
|
|
+ try:
|
|
|
|
+ kw = jsobj['kw']
|
|
|
|
+ url = jsobj['url']
|
|
|
|
+
|
|
|
|
+ # if jsobj.get('domain') is None:
|
|
|
|
+ # exclude = jsobj['exclude']
|
|
|
|
+ # domain = None
|
|
|
|
+ # else:
|
|
|
|
+ # domain = jsobj['domain']
|
|
|
|
+ # exclude = None
|
|
|
|
+
|
|
|
|
+ driver.get(url)
|
|
|
|
+ time.sleep(6)
|
|
|
|
+ scrolling(driver,10)
|
|
|
|
+ time.sleep(20)
|
|
|
|
+ elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
|
|
|
|
+ numresults = len(elmts)
|
|
|
|
+ print('搜尋結果數量', numresults)
|
|
|
|
+ time.sleep(20)
|
|
|
|
+ # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/angelo koo\/' +date +fname + ".png")
|
|
|
|
+ if numresults == 0:
|
|
|
|
+ send_msg('stop working...')
|
|
|
|
+ sys.exit()
|
|
|
|
+ datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': [], '結果說明': []}
|
|
|
|
+ df = pd.DataFrame()
|
|
|
|
+
|
|
|
|
+ idx = 1
|
|
|
|
+ found = False
|
|
|
|
+ test_lst = []
|
|
|
|
+ txt_dict = {}
|
|
|
|
+ for elmt in elmts:
|
|
|
|
+ href = elmt.get_attribute('href')
|
|
|
|
+ txt = elmt.text
|
|
|
|
+ desc = None
|
|
|
|
+
|
|
|
|
+ if domain in href:
|
|
|
|
+ print(href)
|
|
|
|
+ print("ranking", idx)
|
|
|
|
+
|
|
|
|
+ webdriver.ActionChains(driver).move_to_element(elmt).perform()
|
|
|
|
+ # elmt.click()
|
|
|
|
+ webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
|
|
|
|
+ print('clicked....')
|
|
|
|
+ table.insert({'ranking': idx, 'kw': kw, 'results': numresults, 'url': href, 'title': txt,
|
|
|
|
+ 'dt': datetime.datetime.now(), 'client': 'i5'})
|
|
|
|
+ time.sleep(6)
|
|
|
|
+ db.close()
|
|
|
|
+
|
|
|
|
+ idx += 1
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ # if exclude is not None:
|
|
|
|
+ # print('exclude')
|
|
|
|
+ # elmt = random.choice(test_lst[5:])
|
|
|
|
+ # print(elmt)
|
|
|
|
+ # print(txt_dict[elmt])
|
|
|
|
+ #
|
|
|
|
+ # webdriver.ActionChains(driver).move_to_element(elmt).perform()
|
|
|
|
+ # # elmt.click()
|
|
|
|
+ # webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
|
|
|
|
+ # time.sleep(5)
|
|
|
|
+ #
|
|
|
|
+ # if not found:
|
|
|
|
+ # table.insert(
|
|
|
|
+ # {'ranking': -1, 'kw': kw, 'results': numresults, 'url': '', 'title': '未收錄', 'descrption': desc})
|
|
|
|
+
|
|
|
|
+ df['搜尋詞'] = datadict['搜尋詞']
|
|
|
|
+ df['結果標題'] = datadict['結果標題']
|
|
|
|
+ df['結果網址'] = datadict['結果網址']
|
|
|
|
+ df['結果名次'] = datadict['結果名次']
|
|
|
|
+ df['結果說明'] = datadict['結果說明']
|
|
|
|
+
|
|
|
|
+ # df.to_excel('C:\/Users\/s1301\/Pictures\/Saved Pictures\/angelo koo\/'+date+fname+".xls")
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ except:
|
|
|
|
+ print('exception')
|
|
|
|
+ traceback.print_exc()
|
|
|
|
+ # time.sleep(9999)
|
|
|
|
+ # driver.save_screenshot('c:/tmp/seo/'+kw+".png")
|
|
|
|
+
|
|
|
|
+ driver.quit()
|
|
|
|
+ sys.exit()
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+city_lst = ["w+CAIQICIWTmV3IFlvcmssVW5pdGVkIFN0YXRlcw&gws_rd=cr",
|
|
|
|
+ "w+CAIQICIYV2FzaGluZ3RvbixVbml0ZWQgU3RhdGVz&gws_rd=cr",
|
|
|
|
+ "w+CAIQICIbTWlhbWksRmxvcmlkYSxVbml0ZWQgU3RhdGVz&gws_rd=cr",
|
|
|
|
+ "w+CAIQICIeQ2hpY2FnbyxJbGxpbm9pcyxVbml0ZWQgU3RhdGVz&gws_rd=cr"]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+cursor=db.query('select json from seo_jobs where cust="美東" and plan="形象SEO" order by rand() limit 1')
|
|
|
|
+for c in cursor:
|
|
|
|
+ js=json.loads(c['json'])
|
|
|
|
+ prefix=js['prefix']
|
|
|
|
+ postfix=js['postfix']
|
|
|
|
+ domain=js['domain'][0]
|
|
|
|
+ positive=js['positive']
|
|
|
|
+ rnd=js['rnd']
|
|
|
|
+
|
|
|
|
+kw1=random.choice(positive)
|
|
|
|
+kw2=random.choice(rnd)
|
|
|
|
+kw=prefix+" "+kw1+" "+kw2
|
|
|
|
+city = random.choice(city_lst)
|
|
|
|
+url = f"https://www.google.com/search?q={kw}&hl=en&gl=us&num=100&uule={city}&gws_rd=cr"
|
|
|
|
+run_once({'kw':kw,'url':url})
|