|
@@ -23,7 +23,7 @@ import fire
|
|
|
|
|
|
|
|
|
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
-table=db['seo_log']
|
|
|
+table=db['sns_log']
|
|
|
driver = None
|
|
|
|
|
|
|
|
@@ -40,57 +40,47 @@ def rua():
|
|
|
return random.choice(pool)
|
|
|
|
|
|
|
|
|
-path_z = '/Users/zooeytsai/Downloads/chromedriver 2'
|
|
|
-def restart_browser():
|
|
|
- options = webdriver.ChromeOptions()
|
|
|
- options.add_argument("user-agent=%s" % rua())
|
|
|
- options.add_argument('--headless')
|
|
|
- driver=webdriver.Chrome(options=options,executable_path=path_z)
|
|
|
- driver.set_window_size(950,6000)
|
|
|
- return driver
|
|
|
+def empty_query(q):
|
|
|
+ global driver
|
|
|
+ googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
|
|
|
+ driver.get(googleurl)
|
|
|
+ time.sleep(3)
|
|
|
|
|
|
|
|
|
def process_query(qs):
|
|
|
q=qs[0]
|
|
|
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
|
|
|
- lst = []
|
|
|
- client = '班尼斯'
|
|
|
- cursor = db.query(f'select term from seo.sns where client="{client}"')
|
|
|
- for c in cursor:
|
|
|
- lst.append(c['term'])
|
|
|
- for term in lst:
|
|
|
- print(term)
|
|
|
- driver = restart_browser()
|
|
|
- escaped_search_term = urllib.parse.quote(term)
|
|
|
- googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100, 'zh-TW')
|
|
|
- print(googleurl)
|
|
|
- driver.get(googleurl)
|
|
|
- time.sleep(10)
|
|
|
- # fname=term.replace(' ','_')
|
|
|
- # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
|
|
|
- # df=pd.DataFrame()
|
|
|
-
|
|
|
- elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
|
|
|
- idx = 1
|
|
|
- for elmt in elmts:
|
|
|
- href=elmt.get_attribute('href')
|
|
|
- txt=elmt.text
|
|
|
- if len(txt)>10:
|
|
|
- if href in lst:
|
|
|
- print('clicked....')
|
|
|
- print(href)
|
|
|
- print(txt)
|
|
|
- print("ranking", idx)
|
|
|
- table.insert({'kw':q,'client':client,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
|
|
|
- webdriver.ActionChains(driver).move_to_element(elmt).perform()
|
|
|
- webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
|
|
|
- time.sleep(5)
|
|
|
- break
|
|
|
- idx+=1
|
|
|
+ url=qs[1]
|
|
|
+ client=qs[2]
|
|
|
+ global driver
|
|
|
+ escaped_search_term = urllib.parse.quote(q)
|
|
|
+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100, 'zh-TW')
|
|
|
+ print(googleurl)
|
|
|
+ driver.get(googleurl)
|
|
|
+ time.sleep(10)
|
|
|
+ # fname=term.replace(' ','_')
|
|
|
+ # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
|
|
|
+ # df=pd.DataFrame()
|
|
|
+
|
|
|
+ elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
|
|
|
+ idx = 1
|
|
|
+ for elmt in elmts:
|
|
|
+ href=elmt.get_attribute('href')
|
|
|
+ txt=elmt.text
|
|
|
+ if len(txt)>10:
|
|
|
+ if href == url:
|
|
|
+ print('clicked....')
|
|
|
+ print(href)
|
|
|
+ print(txt)
|
|
|
+ print("ranking", idx)
|
|
|
+ table.insert({'kw':q,'client':client,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
|
|
|
+ webdriver.ActionChains(driver).move_to_element(elmt).perform()
|
|
|
+ webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
|
|
|
+ time.sleep(5)
|
|
|
+ break
|
|
|
+ idx+=1
|
|
|
|
|
|
def run_once(q):
|
|
|
global driver
|
|
|
- result=[]
|
|
|
s = Service('/root/driver/chromedriver')
|
|
|
user_agent = rua()
|
|
|
options = webdriver.ChromeOptions()
|
|
@@ -106,26 +96,16 @@ def run_once(q):
|
|
|
driver.delete_all_cookies()
|
|
|
driver.set_window_size(1400,1000)
|
|
|
|
|
|
- print('到此')
|
|
|
process_query(q)
|
|
|
time.sleep(3)
|
|
|
driver.quit()
|
|
|
|
|
|
|
|
|
-#for c in lst:
|
|
|
-#while True:
|
|
|
-# try:
|
|
|
-# c=random.choice(lst)
|
|
|
-# except:
|
|
|
-# traceback.print_exc()
|
|
|
-# sleepint=random.randint(320,520)
|
|
|
-# time.sleep(sleepint)
|
|
|
-
|
|
|
class JParams(object):
|
|
|
|
|
|
- def get(self, kw,domain,port):
|
|
|
+ def get(self, kw,url,client,port):
|
|
|
print('關鍵字',kw)
|
|
|
- run_once( (kw,domain,port) )
|
|
|
+ run_once( (kw,url,client,port) )
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|