|
@@ -8,6 +8,7 @@ import urllib.parse
|
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
|
from selenium.webdriver.common.by import By
|
|
|
from selenium.webdriver.chrome.service import Service
|
|
|
+from selenium.webdriver.common.keys import Keys
|
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
|
import codecs
|
|
|
import random
|
|
@@ -22,8 +23,6 @@ import fire
|
|
|
#pymysql.install_as_MySQLdb()
|
|
|
|
|
|
|
|
|
-db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
-table=db['general_log']
|
|
|
driver = None
|
|
|
|
|
|
|
|
@@ -48,19 +47,25 @@ def empty_query(q):
|
|
|
|
|
|
|
|
|
def process_query(qs):
|
|
|
+ db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
+ table=db['general_log']
|
|
|
q=qs[0]
|
|
|
domain=qs[1]
|
|
|
global driver
|
|
|
- googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
|
|
|
- print(googleurl)
|
|
|
+ googleurl = 'https://www.google.com/?num=100'
|
|
|
driver.get(googleurl)
|
|
|
time.sleep(6)
|
|
|
+ send_kw_elmt = driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
|
|
|
+ send_kw_elmt.send_keys(q)
|
|
|
+ time.sleep(3)
|
|
|
+ send_kw_elmt.send_keys(Keys.ENTER)
|
|
|
+ time.sleep(6)
|
|
|
|
|
|
- elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
|
|
|
+ elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
|
|
|
|
|
|
idx=1
|
|
|
ranking=-1
|
|
|
- print(len(elmts))
|
|
|
+ print('網頁數量',len(elmts))
|
|
|
# driver.save_screenshot('c:/tmp/test.png')
|
|
|
if 'site' in q:
|
|
|
href = elmts[0].get_attribute('href')
|
|
@@ -80,8 +85,8 @@ def process_query(qs):
|
|
|
if len(txt)>10:
|
|
|
if domain in href:
|
|
|
print('clicked....')
|
|
|
- print(href)
|
|
|
- print(txt)
|
|
|
+ print('點擊網址',href)
|
|
|
+ print('標題',txt)
|
|
|
print("ranking", idx)
|
|
|
table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
|
|
|
webdriver.ActionChains(driver).move_to_element(elmt).perform()
|
|
@@ -89,7 +94,7 @@ def process_query(qs):
|
|
|
time.sleep(5)
|
|
|
break
|
|
|
idx+=1
|
|
|
-
|
|
|
+ db.close()
|
|
|
def run_once(q):
|
|
|
global driver
|
|
|
result=[]
|
|
@@ -108,7 +113,6 @@ def run_once(q):
|
|
|
driver.delete_all_cookies()
|
|
|
driver.set_window_size(1400,1000)
|
|
|
|
|
|
- print('到此')
|
|
|
process_query(q)
|
|
|
time.sleep(3)
|
|
|
driver.quit()
|
|
@@ -126,7 +130,6 @@ def run_once(q):
|
|
|
class JParams(object):
|
|
|
|
|
|
def get(self, kw,domain,port):
|
|
|
- print('關鍵字',kw)
|
|
|
run_once( (kw,domain,port) )
|
|
|
|
|
|
|