12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697 |
- import random
- import sys
- import dataset
- from selenium import webdriver
- import traceback
- import datetime
- import codecs
- import time
- import urllib
- import argparse
- import schedule
- import logging
- import sys
- from logging.handlers import SysLogHandler
- import socket
- import pandas as pd
- import socket
- import os
- #logger.debug('[clickbot_100][清原]begin')
- hname=socket.gethostname()
- pid=str(os.getpid())
- def restart_browser():
- options = webdriver.ChromeOptions()
- options.add_argument('--headless')
- driver=webdriver.Chrome(options=options)
- driver.set_window_size(950,6000)
- return driver
- def process_one():
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
- lst=[]
- # table=db['google_rank']
- # cursor = db.query('select term from seo.selected_kw')
- # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
- # for c in cursor:
- # lst.append(c['term'])
- # term=random.choice(lst)
- term='tha 娛樂城'
- print(term)
- driver=restart_browser()
- escaped_search_term=urllib.parse.quote(term)
- googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
- print(googleurl)
- driver.get(googleurl)
- time.sleep(6)
- fname=term.replace(' ','_')
- driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
- df=pd.DataFrame()
- # driver.get_screenshot_as_file("/Users/zooeytsai/排名100.png")
- elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
- clickelmt=None
- cnt=1
- datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
-
- for elmt in elmts:
- try:
- href=elmt.get_attribute('href')
- if 'taroboba-yuan.com' in href:
- clickelmt=elmt
- print(href)
- print(elmt.text)
- datadict['搜尋詞'].append(term)
- datadict['結果標題'].append(elmt.text)
- datadict['結果網址'].append(href)
- datadict['結果名次'].append(str(cnt))
- cnt+=1
- except:
- print('href2 exception')
- traceback.print_exc()
- if clickelmt:
- webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
- webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
- if len(datadict['結果標題'])<=0:
- print('None')
- driver.quit()
- sys.exit()
- df['搜尋詞']=datadict['搜尋詞']
- df['結果標題']=datadict['結果標題']
- df['結果網址']=datadict['結果網址']
- df['結果名次']=datadict['結果名次']
- # df.to_excel('/Users/zooeytsai/'+fname+".xls")
- df.to_excel('c:/tmp/seo/'+fname+".xls")
- driver.quit()
- process_one()
|