123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260 |
- from typing import NoReturn
- from selenium import webdriver
- import time
- import networkx as nx
- import dataset
- import pickle
- import codecs
- from selenium.webdriver.common import keys
- from selenium.webdriver.common.keys import Keys
- import sys
- import os
- import time
- import re
- import pandas as pd
- import df2sheet
- from browser_common import JBrowser
- import datetime
- import dataset
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
- table=db['gap20v6']
- def find_master_by_designer(masters,designer):
- for m in masters:
- if m['designer']==designer:
- return m
- def get_designer_statistics(kw):
- global db
- global table
- jb=JBrowser()
- jb.set_profile_path("Profile 7")
- details=[]
- masters=[]
- if True:
- googleurl='https://www.google.com/search?q='+kw
- jb.get(googleurl)
- driver=jb.get_driver()
- time.sleep(3)
- elmts=driver.find_elements_by_xpath("//div[@class='TbwUpd NJjxre']/cite")
- idx=1
- ranking=-1
- searchhome=-1
- hhh=-1
- com100=-1
- txt=None
- href=None
- for elmt in elmts:
- elmt_titl3=elmt.find_element_by_xpath("../..//h3")
- elmt_href=elmt.find_element_by_xpath("../..")
- print(elmt_titl3.text)
- print(elmt_href.text)
- print(elmt_href.get_attribute('href'))
- table.insert({'title':elmt_titl3.text,'href':elmt_href.get_attribute('href'),'kw':kw.replace(' site:hhh.com.tw','')})
- txt=elmt_titl3.text
- href=elmt_href.text
- if not ('google.com' in href):
- if '100.com' in href:
- com100=idx
- if 'searchome' in href:
- searchhome=idx
- if 'hhh.com.tw' in href:
- print(href)
- print(href)
- print(txt)
- idx+=1
- time.sleep(3)
- qlist=[]
- get_designer_statistics("電視 牆 收納 櫃 site:hhh.com.tw")
|