|
@@ -45,7 +45,7 @@ def brower_start(port):
|
|
|
# browser = webdriver.Chrome(options=options)
|
|
|
|
|
|
browser = webdriver.Remote(
|
|
|
- command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
|
|
|
+ command_executor='http://192.53.174.202:'+str(port)+'/wd/hub',
|
|
|
desired_capabilities=options.to_capabilities()
|
|
|
)
|
|
|
return browser
|
|
@@ -118,8 +118,13 @@ def get_intro_info(driver, output):
|
|
|
element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}簡介']".format(output['name']))
|
|
|
driver.implicitly_wait(10)
|
|
|
ActionChains(driver).move_to_element(element).click(element).perform()
|
|
|
-
|
|
|
- page_down_(driver, "//div[@class='siAUzd-neVct section-scrollbox cYB2Ge-oHo7ed cYB2Ge-ti6hGc']", 3)
|
|
|
+
|
|
|
+ # pageSource = driver.page_source
|
|
|
+ # fileToWrite = open("page_source.html", "w")
|
|
|
+ # fileToWrite.write(pageSource)
|
|
|
+ # fileToWrite.close()
|
|
|
+
|
|
|
+ page_down_(driver, '//*[@id="pane"]/div/div[1]', 3)
|
|
|
|
|
|
intro_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
for key in intro_list:
|
|
@@ -217,8 +222,9 @@ def get_reviews(driver, output):
|
|
|
driver.implicitly_wait(10)
|
|
|
ActionChains(driver).move_to_element(element).click(element).perform()
|
|
|
time.sleep(0.5)
|
|
|
-
|
|
|
- page_down_(driver, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]', 5)
|
|
|
+
|
|
|
+ # page_down_(driver, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]', 5)
|
|
|
+ page_down_(driver, '//div[@class="PPCwl"]',5)
|
|
|
all_photo = driver.find_elements_by_class_name('ODSEW-ShBeI-xJzy8c-bF1uUb')
|
|
|
for ap in all_photo:
|
|
|
ap.click()
|
|
@@ -445,7 +451,6 @@ def page_down_(driver, xpath_css, time_):
|
|
|
elmt=elmts[1]
|
|
|
else:
|
|
|
elmt=elmts[0]
|
|
|
-
|
|
|
actions = ActionChains(driver)
|
|
|
actions.move_to_element(elmt).click().perform()
|
|
|
for i in range(time_):
|
|
@@ -468,11 +473,11 @@ def main():
|
|
|
port=int(sys.argv[2])
|
|
|
|
|
|
url_pd = get_not_cralwer_url(keyword)
|
|
|
- print('drvier start...')
|
|
|
+ print('drvier start {}...'.format(keyword))
|
|
|
driver = brower_start(port)
|
|
|
# driver = serive_create('Profile 1')
|
|
|
- # profilepath = 'Profile 1'
|
|
|
- # driver = serive_create_linux(profilepath)
|
|
|
+ #profilepath = 'Profile 1'
|
|
|
+ #driver = serive_create_linux(profilepath)
|
|
|
|
|
|
for key, row in url_pd.iterrows():
|
|
|
try:
|
|
@@ -520,12 +525,13 @@ def main():
|
|
|
output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
|
|
|
data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
|
|
|
|
|
|
- except:
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
|
|
|
data_select_insert(db, 'error_list', error_table_col, row)
|
|
|
driver.close()
|
|
|
driver = brower_start(port)
|
|
|
- # driver = serive_create_linux(profilepath)
|
|
|
+ driver = serive_create_linux(profilepath)
|
|
|
|
|
|
|
|
|
|