|
@@ -543,7 +543,8 @@ def page_down_(driver, xpath_css, time_):
|
|
|
def main():
|
|
|
db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
|
|
|
db2 = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
|
|
|
- table2 = db2['progress_list2']
|
|
|
+
|
|
|
+ table2 = db2['swire_store_list']
|
|
|
# keyword = '麻辣火鍋'
|
|
|
# if len(sys.argv) >1:
|
|
|
# keyword=sys.argv[1]
|
|
@@ -556,11 +557,15 @@ def main():
|
|
|
os.system('sudo docker container restart p'+str(port))
|
|
|
time.sleep(8)
|
|
|
|
|
|
- for i in range(5):
|
|
|
- keyword = get_new_keyword(db2)
|
|
|
- table2.insert({'kw':keyword,'num':0})
|
|
|
- url_pd = get_not_cralwer_url(keyword)
|
|
|
- print('drvier start {}...'.format(keyword))
|
|
|
+ for i in range(10):
|
|
|
+ result = db2.query('select * from swire_store_list where check_ is null ORDER BY RAND() limit 500')
|
|
|
+ url_pd = pd.DataFrame([dict(i) for i in result])
|
|
|
+ url_pd['item_url'] = url_pd['fid'].apply(lambda x: 'https://www.google.com.tw/maps/@24.1753633,120.6747136,15z/data=!4m5!3m4!1s{}!8m2!3d24.1760271!4d120.6705323'.format(x))
|
|
|
+
|
|
|
+ # keyword = get_new_keyword(db2)
|
|
|
+ # table2.insert({'kw':keyword,'num':0})
|
|
|
+ # url_pd = get_not_cralwer_url(keyword)
|
|
|
+ # print('drvier start {}...'.format(keyword))
|
|
|
driver = brower_start(port)
|
|
|
#driver = serive_create('Profile 6')
|
|
|
#profilepath = 'Profile 1'
|
|
@@ -575,23 +580,18 @@ def main():
|
|
|
print('start...')
|
|
|
driver.get(item_url)
|
|
|
page_down_(driver, "//div[@class='x3AX1-LfntMc-header-title-ij8cu']", 3)
|
|
|
- lat, lon = find_lon_lat(driver)
|
|
|
- unique_id = get_unique_id(driver)
|
|
|
+ # lat, lon = find_lon_lat(driver)
|
|
|
+ # unique_id = get_unique_id(driver)
|
|
|
time_status = time_click(driver)
|
|
|
- if time_status == 'error' or len(unique_id) ==0:
|
|
|
- error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
|
|
|
- data_select_insert(db, 'error_list2', error_table_col, row)
|
|
|
- continue
|
|
|
time.sleep(0.5)
|
|
|
shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
|
|
|
output = {
|
|
|
- 'name': blank_check(shop_soup.find('h1', class_='x3AX1-LfntMc-header-title-title').text),
|
|
|
- 'lon': lon,
|
|
|
- 'lat': lat,
|
|
|
- 'unique_id': unique_id.replace('?share','')
|
|
|
+ # 'name': blank_check(shop_soup.find('h1', class_='x3AX1-LfntMc-header-title-title').text),
|
|
|
+ 'name': name,
|
|
|
+ 'fid': row['fid']
|
|
|
}
|
|
|
- print(output['name'], lon, lat, unique_id)
|
|
|
+ print(output['name'])
|
|
|
|
|
|
print('get_shop_info')
|
|
|
output = get_shop_info(driver, output, shop_soup)
|
|
@@ -624,14 +624,17 @@ def main():
|
|
|
query_name = '{}+{}'.format(output_name, output['addr'])
|
|
|
query_name = query_name.replace(' ','')
|
|
|
output['item_url'] = item_url
|
|
|
- output['keyword'] = keyword
|
|
|
+ output['keyword'] = row['keyword']
|
|
|
output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
|
|
|
data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
|
|
|
-
|
|
|
+ table2.upsert({'place_id':row['place_id'],'check_':1},['place_id'])
|
|
|
except Exception as e:
|
|
|
+ table3 = db2['error_list2']
|
|
|
+ table3.insert({'num':row['name'],'keyword':row['keyword'],'item_url':row['item_url'],'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
|
|
|
print(e)
|
|
|
- error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
|
|
|
- data_select_insert(db, 'error_list2', error_table_col, row)
|
|
|
+ # error_table_col = ['name', 'keyword', 'item_url', 'crawler_date']
|
|
|
+ # db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
|
|
|
+ # data_select_insert(db, 'error_list2', error_table_col, row)
|
|
|
time.sleep(1)
|
|
|
|
|
|
|