choozmo
/
Microfilm_Project


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
							from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import re
import csv
option = ''

club_rolls = 30
post_rolls = 600
def set_profile_path(profilepath):
    option = webdriver.ChromeOptions()
    option.add_argument('--disable-web-security') 
    option.add_argument('--allow-running-insecure-content') 
        
    option.add_argument("--user-data-dir=C:\\Users\\ming\\AppData\\Local\\Google\\Chrome\\User Data\\"+profilepath+"\\")

#        option.add_argument("--user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data\\"+self.profilepath+"\\")

    driver = webdriver.Chrome(options=option)
    return driver
def login():

    #輸入email 
    context = driver.find_element(By.NAME, "email")
    context.send_keys("ming013r@gmail.com")
    time.sleep(1.2)

    #輸入password
    context = driver.find_element(By.ID,'pass')
    context.send_keys("Teof3045")
    time.sleep(1.1)
    #
    commit = driver.find_element(By.NAME,'login')
    commit.click()

    context = driver.find_element_by_xpath("//label[input/@aria-label ='搜尋 Facebook']")
    context.click()
    time.sleep(0.5)
    context.send_keys('科技')
    time.sleep(0.5)
    context.send_keys(Keys.RETURN)
    time.sleep(0.5)
    context = driver.find_element_by_xpath("//*[contains(text(),'粉絲專頁')]")
    time.sleep(0.5)
    context.click()

def get_clubs():
    html = driver.find_element_by_tag_name('html')
    for scrTimes in range(club_rolls):
        html.send_keys(Keys.END)
        time.sleep(0.3)


    context = driver.find_element_by_xpath("//div[div/@role='feed']").find_element_by_css_selector("div:first-child").find_element(By.XPATH,"./div").find_elements(By.XPATH,"./div")
    club_list = []
    for c in context:
        try:
        
            clubName = c.find_element_by_css_selector('span.nc684nl6').find_element_by_css_selector('a.oajrlxb2').get_attribute('aria-label')
            clubUrl =  c.find_element_by_css_selector('span.nc684nl6').find_element_by_css_selector('a.oajrlxb2').get_attribute('href')
            likes = c.find_element_by_xpath(".//span[contains(text(), '說這')]")
            likes = likes.get_attribute('innerHTML').replace('&nbsp;','').replace(' ','').replace(',','').replace('人說這讚','').replace('萬','0000')
            if int(likes) > 100000:
                club_list.append([clubName,clubUrl,likes])
        except Exception as e:
            k=1
    return club_list

driver = set_profile_path("Default")

driver.get("https://www.facebook.com/search/pages/?q=%E7%A7%91%E6%8A%80")
print(driver.title)
time.sleep(0.6)
path = 'output.txt'

lines = []
club_list = get_clubs()


with open('fbReport.csv', 'w', newline='',encoding='UTF-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['社團名稱','社團按讚', '貼文按讚', '貼文內容','日期','分享次數'])

    for club in club_list:
        url = club[1]
        clubName = club[0]
        clubName = club[0]
        driver.get(url)
        time.sleep(0.5)
        html = driver.find_element_by_tag_name('html')
        for scrTimes in range(post_rolls):
            html.send_keys(Keys.END)
            time.sleep(0.1)
        time.sleep(5)
        print(driver.title)


        context = driver.find_element_by_xpath("//div[@class='bp9cbjyn j83agx80 cbu4d94t d2edcug0']").find_element_by_xpath("//div[@class='dp1hu0rb d2edcug0 taijpn5t j83agx80 gs1a9yip']").find_elements_by_xpath("//div[@class='du4w35lb k4urcfbm l9j0dhe7 sjgh65i0']")

        
        postclass='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 fe6kdd0r mau55g9w c8b282yb d3f4x2em iv3no6db jq4qci2q a3bd9o3v b1v8xokw oo9gr5id hzawbc8m'
        dateclasses = 'oajrlxb2 g5ia77u1 qu0x051f esr5mh6w e9989ue4 r7d6kgcz rq0escxv nhd2j8a9 nc684nl6 p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso i1ao9s8h esuyzwwr f1sip0of lzcic4wl gmql0nx0 gpro0wi8 b1v8xokw'
        for c in context:
            try:
                date = c.find_element_by_xpath(".//a[@class='"+dateclasses+"']").get_attribute('aria-label')
                content = c.find_element_by_xpath(".//span[@class='"+postclass+"']").get_attribute('innerHTML')
                likes = c.find_element_by_css_selector('span.pcp91wgn').get_attribute('innerHTML')
                shares = 0
                try:
                    shares = c.find_element_by_xpath(".//span[contains(text(), '次分享')]").get_attribute('innerHTML')
                    shares = shares.replace('次分享','').replace(' ','')
                except:
                    k=1

                if '\n' in content:
                    print('##############################################################')
                    print('##############################################################')
                    print('##############################################################')
                content=re.sub('<.*?>', '', content)
                content = content.replace('\n','')
        
                print('clubName',clubName)
                print('likes1', club[2])
                print('content',content)
                print('date',date)
                print('shares',shares)
                if date is None:
                    date = ''
                writer.writerow([clubName,club[2], likes, content,date,str(shares)])
               
            except Exception as e:
                print(e)
        #print (c.find_element_by_xpath("//span[contains(@id, 'jsc_c')]").get_attribute('innerHTML'))
        #oajrlxb2 
        #print (c.find_element_by_xpath("//span[contains(@id, 'jsc_c')]").find_element_by_css_selector('a.oajrlxb2').get_attribute('aria-label'))

    
driver.close()