123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- from selenium import webdriver
- import time
- import networkx as nx
- import dataset
- import pickle
- import codecs
- from selenium.webdriver.common import keys
- from selenium.webdriver.common.keys import Keys
- import sys
- import os
- import time
- import re
- import pandas as pd
- import df2sheet
- from browser_common import JBrowser
- import datetime
- import dataset
- db = dataset.connect('sqlite:///c:/tmp/hhh.db')
- def get_designer_statistics(designer_list):
- jb=JBrowser()
- jb.set_profile_path("Profile 7")
-
-
-
-
-
-
-
-
- details=[]
- masters=[]
- for q in qlist:
- googleurl='https://www.google.com/search?q='+q
- jb.get(googleurl)
- driver=jb.get_driver()
- time.sleep(3)
- elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
- idx=1
- ranking=-1
- for elmt in elmts:
- href=elmt.get_attribute('href')
- txt=elmt.text
- if len(txt)>10:
- if 'hhh.com.tw' in href:
- ranking=idx
-
- masters.append({'designer':q,'title':txt,'url':href,'ranking':idx})
- print(href)
- print(txt)
-
- details.append({'designer':q,'title':txt,'url':href,'ranking':idx})
- idx+=1
- time.sleep(3)
- return {'masters':masters,'details':details}
- def find_master_by_designer(masters,designer):
- for m in masters:
- if m['designer']==designer:
- return m
- qlist=[]
- cursor=db.query('select name from monitor_list order by updated asc limit 20')
- for c in cursor:
- qlist.append(c['name'])
- results=get_designer_statistics(qlist)
- print(results)
- table=db['monitor_list']
- table_details=db['table_details']
- for q in qlist:
- df = pd.DataFrame(columns=('designer','title','url','ranking','幸福空間排名','更新時間'))
- r=find_master_by_designer(results['masters'],q)
- idx=0
- dtstr=datetime.datetime.now().strftime("%Y-%m/%d %H:%M:%S")
- table.upsert({'name':q,'updated':datetime.datetime.now()},keys=['name'])
- for d in results['details']:
- if d['designer']==q:
- df.loc[idx]=[d['designer'],d['title'],d['url'],d['ranking'],r['ranking'],dtstr]
- idx+=1
- df2sheet.save_sheet(df,'designer_ranking',q,startpos='A1')
|