3 yıl önce · 17ef1a309e
--- a/run.py
+++ b/run.py
@@ -113,16 +113,15 @@ def get_intro_info(driver, output):
 
				             break
			
 
				     
			
 
				     intro_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				-    
			
 
				     for key in intro_list:
			
 
				         elements = intro_soup.find('div',{'aria-label':key})
			
 
				         if elements:
			
 
				             element = elements.find_all('li',{'class':'LQjNnc-p83tee-JNdkSc-ibnC6b'})
			
 
				-            # print(element)
			
 
				             count = 0
			
 
				             tmp = []
			
 
				             for ele in element:
			
 
				-                if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/2x/check_black_18dp.png"}):
			
 
				+                # if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/2x/check_black_18dp.png"}):
			
 
				+                if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/1x/check_black_18dp.png"}):
			
 
				                     tmp += [{
			
 
				                         'id':count,
			
 
				                         intro_list[key][1]: blank_check(ele.text)
			
@@ -208,7 +207,7 @@ def get_reviews(driver, output):
 
				     element = driver.find_element_by_css_selector(more_reviews_css)
			
 
				     driver.implicitly_wait(20)
			
 
				     ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				-    time.sleep(2)
			
 
				+    time.sleep(1)
			
 
				 
			
 
				     all_photo = driver.find_elements_by_class_name('ODSEW-ShBeI-xJzy8c-bF1uUb')
			
 
				     for ap in all_photo:
			
@@ -279,7 +278,7 @@ def find_photo_list(driver):
 
				             actions.move_to_element(element).perform()
			
 
				         except:
			
 
				             break
			
 
				-
			
 
				+    time.sleep(1)
			
 
				     photo_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				     photo_url = []
			
 
				     for photo_id in count_list:
			
@@ -354,7 +353,10 @@ def data_select_insert(db, table_name, table_col, data):
 
				     for name_ in table_col:
			
 
				         if name_ == 'crawler_date':
			
 
				             continue
			
 
				-        tmp += [data[name_]]
			
 
				+        if name_ == 'lon' or name_ == 'lat':
			
 
				+            tmp += [float(data[name_])]
			
 
				+        else:
			
 
				+            tmp += [data[name_]]
			
 
				 
			
 
				     tmp += [datetime.today().strftime("%Y/%m/%d %H:%M")]
			
 
				 
			
@@ -400,21 +402,46 @@ def get_not_cralwer_url(keyword):
 
				     return url_pd
			
 
				 
			
 
				 
			
 
				+def serive_create_linux(profilepath):
			
 
				+    option = webdriver.ChromeOptions()
			
 
				+    option.add_argument('--headless')
			
 
				+    option.add_argument('--no-sandbox')
			
 
				+    option.add_argument('--disable-web-security')
			
 
				+    option.add_argument('--allow-running-insecure-content')
			
 
				+    option.add_argument('--incognito')
			
 
				+    option.add_argument(
			
 
				+        'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0')
			
 
				+    # option.add_argument("--user-data-dir=C:\\Users\\noodles\\AppData\\Local\\Google\\Chrome\\User Data")
			
 
				+    option.add_argument(
			
 
				+        "--user-data-dir=/home/noodlesloves/.config/google-chrome/")
			
 
				+    option.add_argument("profile-directory="+profilepath)
			
 
				+    driver = webdriver.Chrome('utility/chromedriver', options=option)
			
 
				+    # driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', chrome_options=option,
			
 
				+    #                           service_args=['--verbose', '--log-path=/tmp/chromedriver.log'])
			
 
				+
			
 
				+    executor_url = driver.command_executor._url
			
 
				+    session_id = driver.session_id
			
 
				+    print(session_id)
			
 
				+    print(executor_url)
			
 
				+
			
 
				+    return driver
			
 
				+
			
 
				 def main():
			
 
				-    # driver = serive_create('Profile 1')
			
 
				     keyword = '咖啡'
			
 
				     db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
			
 
				     url_pd = get_not_cralwer_url(keyword)
			
 
				 
			
 
				     print('drvier start...')
			
 
				     driver = brower_start()
			
 
				+
			
 
				+    # driver = serive_create('Profile 1')
			
 
				+    # profilepath = 'Profile 1'
			
 
				+    # driver = serive_create_linux(profilepath)
			
 
				     
			
 
				     for key, row in url_pd.iterrows():
			
 
				         try:    
			
 
				             name = row['name']
			
 
				             item_url = row['item_url']
			
 
				-            # result = DA.mysql_select_data(db, 'select item_url from shop_list where item_url="{}"'.format(item_url))
			
 
				-            # if len(result) != 0: continue
			
 
				             print(key, name, ': ' ,item_url)
			
 
				 
			
 
				             driver.get(item_url)
			
@@ -425,7 +452,7 @@ def main():
 
				                 time.sleep(0.5)
			
 
				             print('start...')
			
 
				             time_status = time_click(driver)
			
 
				-            time.sleep(1)
			
 
				+            time.sleep(0.5)
			
 
				             shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				 
			
 
				             output = {
			
@@ -449,16 +476,14 @@ def main():
 
				             output['item_url'] = item_url
			
 
				             output['keyword'] = keyword
			
 
				             output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
			
 
				-            time.sleep(1)
			
 
				             data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
			
 
				 
			
 
				         except:
			
 
				             error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
			
 
				             data_select_insert(db, 'error_list', error_table_col, row)
			
 
				-            # error = pd.DataFrame([row])
			
 
				-            # error.to_csv('error.csv', mode='a', header = False)
			
 
				             driver.close()
			
 
				             driver = brower_start()
			
 
				+            # driver = serive_create_linux(profilepath)