Jared 3 lat temu
rodzic
commit
e1c01538c6
5 zmienionych plików z 311 dodań i 1 usunięć
  1. 166 0
      category.csv
  2. 116 0
      jared_shop_item_list.py
  3. 3 1
      shop_item_list.py
  4. 13 0
      utility/import_category.py
  5. 13 0
      utility/import_latlon.py

+ 166 - 0
category.csv

@@ -0,0 +1,166 @@
+美食排行(23項),33,F1
+咖啡,41,F1001
+火鍋,42,F1002
+早餐,43,F1003
+牛排,44,F1004
+麵包糕點,45,F1005
+素食,46,F1006
+吃到飽,47,F1007
+韓式,48,F1008
+日式,49,F1009
+早午餐,50,F1010
+甜點,51,F1011
+燒烤,52,F1012
+鐵板燒,53,F1013
+冰品/飲品,54,F1014
+泰式,55,F1015
+美式,56,F1016
+下午茶,57,F1017
+義式,58,F1018
+餐酒館,59,F1019
+熱炒,60,F1020
+親子餐廳,61,F1021
+輕食,62,F1022
+合菜,63,F1023
+中式,198,F1024
+在地美食(100項),34,F2
+牛肉麵,64,F2001
+便當,65,F2002
+粥,66,F2003
+雞排,67,F2004
+水餃,68,F2005
+炒飯,69,F2006
+滷味,70,F2007
+豆花,71,F2008
+臭豆腐,72,F2009
+鹹酥雞,73,F2010
+肉圓,74,F2011
+麻糬,75,F2012
+飯糰,76,F2013
+豬腳,77,F2014
+鵝肉,78,F2015
+三明治,79,F2016
+油飯,80,F2017
+鳳梨酥,81,F2018
+蔥油餅,82,F2019
+滷肉飯,83,F2020
+麻辣火鍋,84,F2021
+潤餅,85,F2022
+小籠包,86,F2023
+虱目魚,87,F2024
+蘿蔔糕,88,F2025
+生煎包,89,F2026
+蚵仔煎,90,F2027
+麻辣燙,91,F2028
+鹹水雞,92,F2029
+珍珠奶茶,93,F2030
+大腸麵線,94,F2031
+蚵仔麵線,95,F2032
+擔仔麵,96,F2033
+割包,97,F2034
+刨冰,98,F2035
+鐵蛋,99,F2036
+蔥抓餅,100,F2037
+燒仙草,101,F2038
+烤地瓜,102,F2039
+藥燉排骨,103,F2040
+皮蛋豆腐,104,F2041
+車輪餅,105,F2042
+肉羹麵,106,F2043
+大腸包小腸,107,F2044
+甜不辣,108,F2045
+愛玉,109,F2046
+魚丸湯,110,F2047
+鼎邊銼,111,F2048
+雞翅飯捲,112,F2049
+貢丸湯,113,F2050
+胡椒餅,114,F2051
+草仔粿,115,F2052
+碗粿,116,F2053
+炒麵,117,F2054
+肉粽,118,F2055
+芋圓,119,F2056
+紅豆湯,120,F2057
+綠豆湯,121,F2058
+木瓜牛奶,122,F2059
+芒果冰,123,F2060
+豬血糕,124,F2061
+竹筒飯,125,F2062
+鍋貼,126,F2063
+當歸,127,F2064
+地瓜球,128,F2065
+炒米粉,129,F2066
+酸梅湯,130,F2067
+發粿,131,F2068
+粉圓冰,132,F2069
+客家擂茶,133,F2070
+米漿,134,F2071
+烤玉米,135,F2072
+煎餃,136,F2073
+米粉湯,137,F2074
+麻辣鴨血,138,F2075
+蛋花湯,139,F2076
+炸蛋餅,140,F2077
+花生湯,141,F2078
+餛飩湯,142,F2079
+卜肉,143,F2080
+魷魚羹,144,F2081
+鐵板麵,145,F2082
+肉燥飯,146,F2083
+四神湯,147,F2084
+阿給,148,F2085
+蚵嗲,149,F2086
+炸麻糬,150,F2087
+炸熱狗,151,F2088
+牛舌餅,152,F2089
+羊肉爐,153,F2090
+泡泡冰,154,F2091
+客家小炒,155,F2092
+火雞肉飯,156,F2093
+花生糖,157,F2094
+蜜餞,158,F2095
+米苔目,159,F2096
+糕渣,160,F2097
+鱔魚意麵,161,F2098
+棺材板,162,F2099
+冬瓜茶,163,F2100
+特色餐廳(6項),35,F3
+打卡餐廳,164,F3001
+約會餐廳,165,F3002
+親子餐廳,166,F3003
+寵物餐廳,167,F3004
+景觀餐廳,168,F3005
+主題餐廳,169,F3006
+星級米其林(5項),36,F4
+三星,170,F4001
+二星,171,F4002
+一星,172,F4003
+必比登,173,F4004
+餐盤推薦,174,F4005
+飯店民宿(7項),37,H1
+五星飯店,175,H1001
+四星飯店,176,H1002
+商務飯店,177,H1003
+特色民宿,178,H1004
+青年旅館,179,H1005
+汽車旅館,180,H1006
+露營區,181,H1007
+量販超市(3項),38,S1
+量販店,182,S1001
+超市,183,S1002
+超商,184,S1003
+百貨服飾(7項),39,D1
+百貨公司,185,D1001
+女裝店,186,D1002
+男裝店,187,D1003
+童裝店,188,D1004
+嬰兒服,189,D1005
+運動用品,190,D1006
+鞋類,191,D1007
+高鐵加油(6項),40,R1
+高鐵站,192,R1001
+火車站,193,R1002
+中油,194,R1003
+台塑,195,R1004
+全國,196,R1005
+速邁樂,197,R1006

+ 116 - 0
jared_shop_item_list.py

@@ -0,0 +1,116 @@
+# -*- coding: utf-8 -*-
+from selenium import webdriver
+from selenium.webdriver.common.action_chains import ActionChains
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.common.by import By
+
+from bs4 import BeautifulSoup
+
+from utility import database_access as DA
+from utility.parseutils import *
+from utility.connect import *
+import dataset
+
+from datetime import datetime
+import pandas as pd
+import time
+import json
+import re
+
+def brower_start():
+    options = webdriver.ChromeOptions()
+    browser = webdriver.Chrome(options=options)
+
+#    browser = webdriver.Remote(
+#        command_executor='http://192.53.174.202:4444/wd/hub',
+#        desired_capabilities=options.to_capabilities()
+#    )
+    return browser
+
+
+def get_url_list(driver):
+    for i in range(5, 43, 2):
+        try:
+            wait = WebDriverWait(driver, 60)
+            wait.until(
+                EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)))
+            )
+            driver.find_element(By.XPATH,'//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)).send_keys(Keys.DOWN)
+            time.sleep(1)
+        except:
+            pass
+    url_soup = BeautifulSoup(driver.page_source, 'html.parser')
+    url_list = []
+    for i in url_soup.find_all('a'):
+        try:
+            if i['href'].find('maps/place') != -1:
+                url_list += [[i['href'], i['aria-label']]]
+        except:
+            pass
+    
+    return url_list
+
+
+def keyin_keyword(driver, keyword):
+    button = driver.find_element_by_id("searchbox")
+    driver.implicitly_wait(30)
+    ActionChains(driver).move_to_element(button).send_keys(keyword).send_keys(Keys.RETURN).perform()
+    time.sleep(3)
+
+
+def main():
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
+
+
+    print('drvier start...')
+    driver = brower_start()
+    keyword = '滷肉飯'
+    num=-1
+    cursor=db.query('select num from progress_list where kw = "'+keyword+'"')
+    for c in cursor:
+        num=c['num']
+        break
+
+    table2=db['progress_list']
+
+
+    cursor=db.query('select * from lat_lon_loc where num >= '+str(num))
+#    cursor=db.query('select * from lat_lon_loc')
+    lst=[]
+    for c in cursor:
+        lst.append({'num':c['num'],'loc':c['loc'],'lat':c['lat'],'lon':c['lon']})
+
+
+
+    for r in lst:
+
+        latitude = r['lat'] #緯度
+        longitude = r['lon'] #精度
+        table2.upsert({'kw':keyword,'num':r['num']},['kw'])
+        url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
+        driver.get(url)
+
+        keyin_keyword(driver, keyword)
+        
+        for page in range(4):
+            print( r['loc'], latitude, longitude, page)
+            url_list = get_url_list(driver)
+            print(url_list)
+            shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
+            for item in url_list:
+                result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
+                print(result)
+#                insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
+#                                .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
+
+#                DA.mysql_insert_data(db, insert_sql)
+            
+            if page < 2 :
+                element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
+                driver.implicitly_wait(30)
+                ActionChains(driver).move_to_element(element).click(element).perform() 
+
+if __name__ == '__main__':
+    main()

+ 3 - 1
shop_item_list.py

@@ -63,7 +63,9 @@ def main():
     print('drvier start...')
     driver = brower_start()
 
-    for keyword in ['早餐','牛排', '麵包蛋糕']:
+#    for keyword in ['碗粿','炒麵','肉粽']:
+    for keyword in ['碗粿']:
+
         for k, row in data.iterrows():
             try:
                 latitude = row['latitude'] #緯度

+ 13 - 0
utility/import_category.py

@@ -0,0 +1,13 @@
+import codecs
+import dataset
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
+table=db['category']
+fr=codecs.open('category.csv','r','utf-8')
+lines=fr.readlines()
+for l in lines:
+    newl=l.strip()
+#    print(newl)
+    elmts=newl.split(',')
+    table.insert({'keyword':elmts[0],'num':elmts[1],'label':elmts[2]})
+#print(lines)
+fr.close()

+ 13 - 0
utility/import_latlon.py

@@ -0,0 +1,13 @@
+import codecs
+import dataset
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
+table=db['lat_lon_loc']
+fr=codecs.open('lat_long_location.csv','r','utf-8')
+lines=fr.readlines()
+for l in lines[1:]:
+    newl=l.strip()
+#    print(newl)
+    elmts=newl.split(',')
+    table.insert({'num':elmts[0],'loc':elmts[1],'lat':elmts[2],'lon':elmts[3]})
+#print(lines)
+fr.close()