|
@@ -0,0 +1,108 @@
|
|
|
+from selenium import webdriver
|
|
|
+from selenium.common.exceptions import TimeoutException
|
|
|
+from selenium.webdriver.support.ui import WebDriverWait
|
|
|
+from selenium.webdriver.support import expected_conditions as EC
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+from time import sleep
|
|
|
+import csv
|
|
|
+#搜尋關鍵字
|
|
|
+keyword = '科技'
|
|
|
+#太多貼文捲不完,設置timeout
|
|
|
+max_scroll_time = 1
|
|
|
+
|
|
|
+def scroll_to_end(cut):
|
|
|
+ SCROLL_PAUSE_TIME = 0.5
|
|
|
+ last_height = driver.execute_script("return document.body.scrollHeight")
|
|
|
+ exe_times = 0
|
|
|
+ while True:
|
|
|
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
|
|
+ sleep(SCROLL_PAUSE_TIME)
|
|
|
+ new_height = driver.execute_script("return document.body.scrollHeight")
|
|
|
+ if new_height == last_height:
|
|
|
+ break
|
|
|
+ exe_times += 1
|
|
|
+ if cut !=0 :
|
|
|
+ if exe_times>cut:
|
|
|
+ break
|
|
|
+ last_height = new_height
|
|
|
+
|
|
|
+def set_profile_path(profilepath):
|
|
|
+ option = webdriver.ChromeOptions()
|
|
|
+ option.add_argument('--disable-web-security')
|
|
|
+ option.add_argument('--allow-running-insecure-content')
|
|
|
+ option.add_argument("--user-data-dir="+profilepath+"\\")
|
|
|
+ driver = webdriver.Chrome(options=option)
|
|
|
+ return driver
|
|
|
+
|
|
|
+driver = set_profile_path('C:/Users/ming/AppData/Local/Google/Chrome/User Data/Default')
|
|
|
+
|
|
|
+driver.get('https://www.facebook.com/search/pages/?q='+keyword)
|
|
|
+scroll_to_end(max_scroll_time)
|
|
|
+sleep(1)
|
|
|
+
|
|
|
+clubs_elem = driver.find_elements(By.XPATH,"//div[@class='j83agx80 l9j0dhe7 k4urcfbm']")
|
|
|
+club_urls = []
|
|
|
+club_thumb = []
|
|
|
+for e in clubs_elem:
|
|
|
+
|
|
|
+ url = e.find_elements(By.XPATH,".//span[@class='nc684nl6']/a")[0].get_attribute('href')
|
|
|
+ club_urls.append(url)
|
|
|
+
|
|
|
+
|
|
|
+rows = []
|
|
|
+col_name = ['社團名稱','社團按讚','貼文按讚','貼文內容','日期','分享數','留言數']
|
|
|
+rows.append(col_name)
|
|
|
+thumb_icon_style='background-image: url("https://static.xx.fbcdn.net/rsrc.php/v3/yP/r/-yo1T7mJE5M.png"); background-position: 0px -618px; background-size: 26px 1376px; width: 20px; height: 20px; background-repeat: no-repeat; display: inline-block;'
|
|
|
+for url in club_urls:
|
|
|
+ driver.get(url)
|
|
|
+ #for page loading
|
|
|
+ sleep(5)
|
|
|
+ club_name = driver.find_element(By.XPATH,"//div[@class='tr9rh885']//h2/span/span").text
|
|
|
+ thumbs = driver.find_element(By.XPATH,"//div[@class='taijpn5t cbu4d94t j83agx80']//span/span").text
|
|
|
+ scroll_to_end(max_scroll_time)
|
|
|
+ #過長貼文需要展開
|
|
|
+ showmores = driver.find_elements(By.XPATH,".//*[contains(text(), '顯示更多')]")
|
|
|
+ for show_btn in showmores:
|
|
|
+ try:
|
|
|
+ show_btn.click()
|
|
|
+ except:
|
|
|
+ pass
|
|
|
+
|
|
|
+ post_elems = driver.find_elements(By.XPATH,"//div[@class='du4w35lb k4urcfbm l9j0dhe7 sjgh65i0']")
|
|
|
+ for p_elem in post_elems:
|
|
|
+ try:
|
|
|
+ content = p_elem.find_element(By.XPATH,".//div[@class='ecm0bbzt hv4rvrfc ihqw7lf3 dati1w0a']").text.rstrip()
|
|
|
+ time = p_elem.find_element(By.XPATH,".//span[@class='j1lvzwm4 stjgntxs ni8dbmo4 q9uorilb gpro0wi8']/b/b[not(@style='display: none;')]").text
|
|
|
+ post_thumbs = p_elem.find_element(By.XPATH,".//span[@class='gpro0wi8 cwj9ozl2 bzsjyuwj ja2t1vim']").text
|
|
|
+ shares_comments = p_elem.find_element(By.XPATH,".//div[@class='bp9cbjyn j83agx80 pfnyh3mw p1ueia1e']").text.splitlines()
|
|
|
+ shares = ''
|
|
|
+ comments = ''
|
|
|
+ #分享、留言都塞在這div內
|
|
|
+ if len(shares_comments) != 0:
|
|
|
+ for item in shares_comments:
|
|
|
+ if '分享' in item:
|
|
|
+ shares = item
|
|
|
+ else:
|
|
|
+ comments = item
|
|
|
+ if shares == '':
|
|
|
+ shares=0
|
|
|
+ if comments =='':
|
|
|
+ comments=0
|
|
|
+ #分享頁面沒有貼文內容
|
|
|
+ if content != '':
|
|
|
+ rows.append([club_name,thumbs,post_thumbs,content,time,shares,comments])
|
|
|
+ print(time)
|
|
|
+ print(content)
|
|
|
+ print(shares)
|
|
|
+ print(comments)
|
|
|
+ print(post_thumbs)
|
|
|
+ print('----------------------------------------------------------------')
|
|
|
+ except:
|
|
|
+ pass #Facebook streaming video posts have no content
|
|
|
+
|
|
|
+with open('out.csv', 'w', encoding='UTF8') as f:
|
|
|
+ writer = csv.writer(f)
|
|
|
+ for r in rows:
|
|
|
+ writer.writerow(r)
|
|
|
+
|
|
|
+
|