| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697 | 
							- import random
 
- import sys
 
- import dataset
 
- from selenium import webdriver
 
- import traceback
 
- import datetime
 
- import codecs
 
- import time
 
- import urllib
 
- import argparse
 
- import schedule
 
- import logging
 
- import sys
 
- from logging.handlers import SysLogHandler
 
- import socket
 
- import pandas as pd
 
- import socket
 
- import os
 
- #logger.debug('[clickbot_100][清原]begin')
 
- hname=socket.gethostname()
 
- pid=str(os.getpid())
 
- def restart_browser():
 
-     options = webdriver.ChromeOptions()
 
-     options.add_argument('--headless') 
 
-     driver=webdriver.Chrome(options=options)
 
-     driver.set_window_size(950,6000)
 
-     return driver
 
- def process_one():
 
-     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
 
-     lst=[]
 
- #    table=db['google_rank']
 
- #    cursor = db.query('select term from seo.selected_kw')
 
-     # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
 
- #    for c in cursor:
 
- #        lst.append(c['term'])
 
- #    term=random.choice(lst)
 
-     term='tha 娛樂城'
 
-     print(term)
 
-     driver=restart_browser()
 
-     escaped_search_term=urllib.parse.quote(term)
 
-     googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
 
-     print(googleurl)
 
-     driver.get(googleurl)
 
-     time.sleep(6)
 
-     fname=term.replace(' ','_')
 
-     driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
 
-     df=pd.DataFrame()
 
-     # driver.get_screenshot_as_file("/Users/zooeytsai/排名100.png")
 
-     elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
 
-     clickelmt=None
 
-     cnt=1
 
-     datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
 
-     
 
-     for elmt in elmts:
 
-         try:
 
-             href=elmt.get_attribute('href')
 
-             if 'taroboba-yuan.com' in href:
 
-                 clickelmt=elmt
 
-             print(href)
 
-             print(elmt.text)
 
-             datadict['搜尋詞'].append(term)
 
-             datadict['結果標題'].append(elmt.text)
 
-             datadict['結果網址'].append(href)
 
-             datadict['結果名次'].append(str(cnt))
 
-             cnt+=1
 
-         except:
 
-             print('href2 exception')
 
-             traceback.print_exc()
 
-     if clickelmt:
 
-         webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
 
-         webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
 
-     if len(datadict['結果標題'])<=0:
 
-         print('None')
 
-         driver.quit()
 
-         sys.exit()
 
-     df['搜尋詞']=datadict['搜尋詞']
 
-     df['結果標題']=datadict['結果標題']
 
-     df['結果網址']=datadict['結果網址']
 
-     df['結果名次']=datadict['結果名次']
 
- #    df.to_excel('/Users/zooeytsai/'+fname+".xls")
 
-     df.to_excel('c:/tmp/seo/'+fname+".xls")
 
-     driver.quit()
 
- process_one()
 
 
  |