123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- from selenium import webdriver
- from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
- import time
- from selenium.webdriver.common.by import By
- from selenium.webdriver.common.keys import Keys
- import re
- import csv
- option = ''
- club_rolls = 30
- post_rolls = 600
- def set_profile_path(profilepath):
- option = webdriver.ChromeOptions()
- option.add_argument('--disable-web-security')
- option.add_argument('--allow-running-insecure-content')
-
- option.add_argument("--user-data-dir=C:\\Users\\ming\\AppData\\Local\\Google\\Chrome\\User Data\\"+profilepath+"\\")
- # option.add_argument("--user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data\\"+self.profilepath+"\\")
- driver = webdriver.Chrome(options=option)
- return driver
- def login():
- #輸入email
- context = driver.find_element(By.NAME, "email")
- context.send_keys("ming013r@gmail.com")
- time.sleep(1.2)
- #輸入password
- context = driver.find_element(By.ID,'pass')
- context.send_keys("Teof3045")
- time.sleep(1.1)
- #
- commit = driver.find_element(By.NAME,'login')
- commit.click()
- context = driver.find_element_by_xpath("//label[input/@aria-label ='搜尋 Facebook']")
- context.click()
- time.sleep(0.5)
- context.send_keys('科技')
- time.sleep(0.5)
- context.send_keys(Keys.RETURN)
- time.sleep(0.5)
- context = driver.find_element_by_xpath("//*[contains(text(),'粉絲專頁')]")
- time.sleep(0.5)
- context.click()
- def get_clubs():
- html = driver.find_element_by_tag_name('html')
- for scrTimes in range(club_rolls):
- html.send_keys(Keys.END)
- time.sleep(0.3)
- context = driver.find_element_by_xpath("//div[div/@role='feed']").find_element_by_css_selector("div:first-child").find_element(By.XPATH,"./div").find_elements(By.XPATH,"./div")
- club_list = []
- for c in context:
- try:
-
- clubName = c.find_element_by_css_selector('span.nc684nl6').find_element_by_css_selector('a.oajrlxb2').get_attribute('aria-label')
- clubUrl = c.find_element_by_css_selector('span.nc684nl6').find_element_by_css_selector('a.oajrlxb2').get_attribute('href')
- likes = c.find_element_by_xpath(".//span[contains(text(), '說這')]")
- likes = likes.get_attribute('innerHTML').replace(' ','').replace(' ','').replace(',','').replace('人說這讚','').replace('萬','0000')
- if int(likes) > 100000:
- club_list.append([clubName,clubUrl,likes])
- except Exception as e:
- k=1
- return club_list
- driver = set_profile_path("Default")
- driver.get("https://www.facebook.com/search/pages/?q=%E7%A7%91%E6%8A%80")
- print(driver.title)
- time.sleep(0.6)
- path = 'output.txt'
- lines = []
- club_list = get_clubs()
- with open('fbReport.csv', 'w', newline='',encoding='UTF-8') as csvfile:
- writer = csv.writer(csvfile)
- writer.writerow(['社團名稱','社團按讚', '貼文按讚', '貼文內容','日期','分享次數'])
- for club in club_list:
- url = club[1]
- clubName = club[0]
- clubName = club[0]
- driver.get(url)
- time.sleep(0.5)
- html = driver.find_element_by_tag_name('html')
- for scrTimes in range(post_rolls):
- html.send_keys(Keys.END)
- time.sleep(0.1)
- time.sleep(5)
- print(driver.title)
- context = driver.find_element_by_xpath("//div[@class='bp9cbjyn j83agx80 cbu4d94t d2edcug0']").find_element_by_xpath("//div[@class='dp1hu0rb d2edcug0 taijpn5t j83agx80 gs1a9yip']").find_elements_by_xpath("//div[@class='du4w35lb k4urcfbm l9j0dhe7 sjgh65i0']")
-
- postclass='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 fe6kdd0r mau55g9w c8b282yb d3f4x2em iv3no6db jq4qci2q a3bd9o3v b1v8xokw oo9gr5id hzawbc8m'
- dateclasses = 'oajrlxb2 g5ia77u1 qu0x051f esr5mh6w e9989ue4 r7d6kgcz rq0escxv nhd2j8a9 nc684nl6 p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso i1ao9s8h esuyzwwr f1sip0of lzcic4wl gmql0nx0 gpro0wi8 b1v8xokw'
- for c in context:
- try:
- date = c.find_element_by_xpath(".//a[@class='"+dateclasses+"']").get_attribute('aria-label')
- content = c.find_element_by_xpath(".//span[@class='"+postclass+"']").get_attribute('innerHTML')
- likes = c.find_element_by_css_selector('span.pcp91wgn').get_attribute('innerHTML')
- shares = 0
- try:
- shares = c.find_element_by_xpath(".//span[contains(text(), '次分享')]").get_attribute('innerHTML')
- shares = shares.replace('次分享','').replace(' ','')
- except:
- k=1
- if '\n' in content:
- print('##############################################################')
- print('##############################################################')
- print('##############################################################')
- content=re.sub('<.*?>', '', content)
- content = content.replace('\n','')
-
- print('clubName',clubName)
- print('likes1', club[2])
- print('content',content)
- print('date',date)
- print('shares',shares)
- if date is None:
- date = ''
- writer.writerow([clubName,club[2], likes, content,date,str(shares)])
-
- except Exception as e:
- print(e)
- #print (c.find_element_by_xpath("//span[contains(@id, 'jsc_c')]").get_attribute('innerHTML'))
- #oajrlxb2
- #print (c.find_element_by_xpath("//span[contains(@id, 'jsc_c')]").find_element_by_css_selector('a.oajrlxb2').get_attribute('aria-label'))
-
- driver.close()
|