|
@@ -25,7 +25,7 @@ driver = None
|
|
|
def restart_browser():
|
|
|
global driver
|
|
|
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
|
|
|
- s = Service('/Users/mac/Downloads/123/chromedriver')
|
|
|
+ s = Service('/Users/mac/Downloads/125/chromedriver')
|
|
|
options = webdriver.ChromeOptions()
|
|
|
options.add_argument('--headless')
|
|
|
# options.add_argument('--remote-debugging-port=9222')
|
|
@@ -41,34 +41,34 @@ def restart_browser():
|
|
|
return driver
|
|
|
|
|
|
def process_one():
|
|
|
- # lst=['信義房屋','信義 房屋','信義房仲','信義 房仲']
|
|
|
- # lst=["親子育兒交流 奶爸","產後瘦身益生菌","產後瘦身推薦"]
|
|
|
+ lst=['信義房屋','信義 房屋','信義房仲','信義 房仲']
|
|
|
+ # lst=["驊揚室內裝修設計","驊揚室內裝修設計 評論"]
|
|
|
# lst=['真理大學','真理大學停招','真理大學倒閉','真理大學評價','真理大學倒閉dcard']
|
|
|
- # lst = ['富玉珠寶']
|
|
|
+ # lst = ['百威旅行社','百威旅遊','百威旅行']
|
|
|
# lst = ['信義 房屋']
|
|
|
# lst = ['加百裕工業股份有限公司','加百裕','黃靖容']
|
|
|
# lst = ['富玉珠寶','富玉珠寶有限公司']
|
|
|
# lst = ['台北牙周病醫生推薦 ','微創水雷射','水雷射牙周病']
|
|
|
# lst = ['真理大學 site:ptt.cc','真理大學 site:dcard.tw']
|
|
|
# lst = ['涼夏清爽','夏季口罩','石墨烯','透氣 口罩','夏天口罩推薦','夏天立體口罩']
|
|
|
- lst = ['啟翔輕金屬','啟翔','陳百欽']
|
|
|
- date='0321'
|
|
|
+ # lst = ['啟翔輕金屬','啟翔','陳百欽']
|
|
|
+ date='0529'
|
|
|
|
|
|
for term in lst:
|
|
|
driver=restart_browser()
|
|
|
escaped_search_term=urllib.parse.quote(term)
|
|
|
- googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(escaped_search_term, 100,'zh-TW')
|
|
|
- # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(term), 100, 'zh-TW')
|
|
|
+ # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(escaped_search_term, 100,'zh-TW')
|
|
|
+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(term), 100, 'zh-TW')
|
|
|
|
|
|
driver.get(googleurl)
|
|
|
time.sleep(6)
|
|
|
print(driver.current_url)
|
|
|
|
|
|
- driver.save_screenshot('/Users/mac/Downloads/'+date+term+'.png')
|
|
|
- # driver.save_screenshot('/Users/mac/Downloads/'+date+term+"_vi"+'.png')
|
|
|
+ # driver.save_screenshot('/Users/mac/Downloads/'+date+term+'.png')
|
|
|
+ driver.save_screenshot('/Users/mac/Downloads/'+date+term+"_vi"+'.png')
|
|
|
df=pd.DataFrame()
|
|
|
- elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']//a")
|
|
|
- # elmts=driver.find_elements(By.XPATH,"//div[@class='xe8e1b']//a")
|
|
|
+ # elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']//a")
|
|
|
+ elmts=driver.find_elements(By.XPATH,"//div[@class='xe8e1b']//a")
|
|
|
print(len(elmts))
|
|
|
cnt=1
|
|
|
datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
|
|
@@ -95,8 +95,8 @@ def process_one():
|
|
|
df['結果網址']=datadict['結果網址']
|
|
|
df['結果名次']=datadict['結果名次']
|
|
|
|
|
|
- df.to_csv('/Users/mac/Downloads/'+date+term+".csv", encoding='utf-8')
|
|
|
- # df.to_csv('/Users/mac/Downloads/'+date+term+"_vi"+".csv")
|
|
|
+ # df.to_csv('/Users/mac/Downloads/'+date+term+".csv", encoding='utf-8')
|
|
|
+ df.to_csv('/Users/mac/Downloads/'+date+term+"_vi"+".csv")
|
|
|
|
|
|
driver.quit()
|
|
|
print('等待')
|