| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 | 
							- from selenium import webdriver
 
- import time
 
- import networkx as nx
 
- import dataset
 
- import pickle
 
- import codecs
 
- from selenium.webdriver.common import keys
 
- from selenium.webdriver.common.keys import Keys
 
- import sys
 
- import os
 
- import time
 
- import re
 
- import jieba
 
- from browser_common import JBrowser
 
- import jinja2
 
- import df2sheet
 
- import pandas as pd
 
- #db = dataset.connect('sqlite:///c:/tmp/gspace.db')
 
- db = dataset.connect('sqlite:///:memory:')
 
- jieba.load_userdict('C:\\src\\farmcodes\\jared\\browser\\dict.txt')
 
- jieba.load_userdict('C:\\tmp\\dict.txt')
 
- table=db['tmp']
 
- #db.query('delete from ranking')
 
- #db.query('delete from hhh_ranking')
 
- jb=JBrowser()
 
- jb.set_profile_path("Profile 7")
 
- #kw='收納櫃'
 
- #kw='收納櫃推薦'
 
- #kw='收納櫃ikea'
 
- #kw='收納櫃塑膠'
 
- #kw='收納櫃設計'
 
- #kw='收納'
 
- #kw='系統櫃'
 
- #kw='抽屜'
 
- #kw='系統櫃推薦'
 
- #kw='系統櫃價格'
 
- #kw='系統櫃廠商'
 
- #kw='系統櫃ptt'
 
- #kw='系統櫃材質'
 
- #kw='系統櫃樣式'
 
- #kw='系統櫃品牌'
 
- #kw='系統櫃衣櫃尺寸'
 
- #kw='系統櫃板材'
 
- #kw='合砌設計'
 
- #kw='富億空間'
 
- kw='空間 設計'
 
- googleurl='https://www.google.com/search?q='+kw
 
- jb.get(googleurl)
 
- for i in range(3):
 
-     driver=jb.get_driver()
 
-     time.sleep(3)
 
-     elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
 
-     idx=1
 
-     ranking=-1
 
-     for elmt in elmts:
 
-         href=elmt.get_attribute('href')
 
-         txt=elmt.text
 
- #        print(href)
 
- #        print(txt)
 
-         try:
 
-             elmt2=elmt.find_element_by_xpath("../../../div")
 
-             fulldesc=elmt2.text
 
-             print(fulldesc)
 
-             seg_list = jieba.cut(fulldesc, cut_all=True)
 
-             for s in seg_list:
 
-                 print(s)
 
-                 table.insert({'kw':s})
 
-         except:
 
-             print('except')
 
-         idx+=1
 
-     elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
 
-     webdriver.ActionChains(driver).move_to_element(elmt).perform()
 
-     webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
 
-     time.sleep(1)
 
-     
 
- ##    time.sleep(9999)
 
- #    time.sleep(2)
 
- #from jinja2 import Environment, FileSystemLoader
 
- #THIS_DIR = os.path.dirname(os.path.abspath(__file__))
 
- #j2_env = Environment(loader=FileSystemLoader('c:/tmp'))
 
- #'http://icons-for-free.com/free-icons/png/128/1312099.png'
 
- data=[]
 
- df = pd.DataFrame(columns=('keywords','cnt'))
 
- cursor=db.query('select kw as kw,count(*) as cnt from tmp group by kw having count(*)>=1 order by count(*) desc')
 
- for c in cursor:
 
-     print(c['kw'])
 
-     print(c['cnt'])
 
-     data.append({'kw':c['kw'],'cnt':c['cnt']})
 
-     df.loc[idx]=[c['kw'],c['cnt']]
 
-     idx+=1
 
- content=df.to_csv(index=False)
 
- fw=codecs.open('c:/tmp/exp.csv','w','utf-8')
 
- fw.write(content)
 
- fw.close()
 
- #df2sheet.save_sheet(df,'May-Event',kw,startpos='A1')
 
- #output=j2_env.get_template('hhh_kw.tmpl').render(data=data)
 
- #import codecs
 
- #fw=codecs.open('c:/tmp/hhh_kw.html', 'w','utf-8')
 
- #fw.write(output)
 
- #fw.close()
 
 
  |