fb.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. from selenium import webdriver
  2. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  3. import time
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.common.keys import Keys
  6. import re
  7. import csv
  8. option = ''
  9. club_rolls = 30
  10. post_rolls = 600
  11. def set_profile_path(profilepath):
  12. option = webdriver.ChromeOptions()
  13. option.add_argument('--disable-web-security')
  14. option.add_argument('--allow-running-insecure-content')
  15. option.add_argument("--user-data-dir=C:\\Users\\ming\\AppData\\Local\\Google\\Chrome\\User Data\\"+profilepath+"\\")
  16. # option.add_argument("--user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data\\"+self.profilepath+"\\")
  17. driver = webdriver.Chrome(options=option)
  18. return driver
  19. def login():
  20. #輸入email
  21. context = driver.find_element(By.NAME, "email")
  22. context.send_keys("ming013r@gmail.com")
  23. time.sleep(1.2)
  24. #輸入password
  25. context = driver.find_element(By.ID,'pass')
  26. context.send_keys("Teof3045")
  27. time.sleep(1.1)
  28. #
  29. commit = driver.find_element(By.NAME,'login')
  30. commit.click()
  31. context = driver.find_element_by_xpath("//label[input/@aria-label ='搜尋 Facebook']")
  32. context.click()
  33. time.sleep(0.5)
  34. context.send_keys('科技')
  35. time.sleep(0.5)
  36. context.send_keys(Keys.RETURN)
  37. time.sleep(0.5)
  38. context = driver.find_element_by_xpath("//*[contains(text(),'粉絲專頁')]")
  39. time.sleep(0.5)
  40. context.click()
  41. def get_clubs():
  42. html = driver.find_element_by_tag_name('html')
  43. for scrTimes in range(club_rolls):
  44. html.send_keys(Keys.END)
  45. time.sleep(0.3)
  46. context = driver.find_element_by_xpath("//div[div/@role='feed']").find_element_by_css_selector("div:first-child").find_element(By.XPATH,"./div").find_elements(By.XPATH,"./div")
  47. club_list = []
  48. for c in context:
  49. try:
  50. clubName = c.find_element_by_css_selector('span.nc684nl6').find_element_by_css_selector('a.oajrlxb2').get_attribute('aria-label')
  51. clubUrl = c.find_element_by_css_selector('span.nc684nl6').find_element_by_css_selector('a.oajrlxb2').get_attribute('href')
  52. likes = c.find_element_by_xpath(".//span[contains(text(), '說這')]")
  53. likes = likes.get_attribute('innerHTML').replace(' ','').replace(' ','').replace(',','').replace('人說這讚','').replace('萬','0000')
  54. if int(likes) > 100000:
  55. club_list.append([clubName,clubUrl,likes])
  56. except Exception as e:
  57. k=1
  58. return club_list
  59. driver = set_profile_path("Default")
  60. driver.get("https://www.facebook.com/search/pages/?q=%E7%A7%91%E6%8A%80")
  61. print(driver.title)
  62. time.sleep(0.6)
  63. path = 'output.txt'
  64. lines = []
  65. club_list = get_clubs()
  66. with open('fbReport.csv', 'w', newline='',encoding='UTF-8') as csvfile:
  67. writer = csv.writer(csvfile)
  68. writer.writerow(['社團名稱','社團按讚', '貼文按讚', '貼文內容','日期','分享次數'])
  69. for club in club_list:
  70. url = club[1]
  71. clubName = club[0]
  72. clubName = club[0]
  73. driver.get(url)
  74. time.sleep(0.5)
  75. html = driver.find_element_by_tag_name('html')
  76. for scrTimes in range(post_rolls):
  77. html.send_keys(Keys.END)
  78. time.sleep(0.1)
  79. time.sleep(5)
  80. print(driver.title)
  81. context = driver.find_element_by_xpath("//div[@class='bp9cbjyn j83agx80 cbu4d94t d2edcug0']").find_element_by_xpath("//div[@class='dp1hu0rb d2edcug0 taijpn5t j83agx80 gs1a9yip']").find_elements_by_xpath("//div[@class='du4w35lb k4urcfbm l9j0dhe7 sjgh65i0']")
  82. postclass='d2edcug0 hpfvmrgz qv66sw1b c1et5uql lr9zc1uh a8c37x1j keod5gw0 nxhoafnm aigsh9s9 fe6kdd0r mau55g9w c8b282yb d3f4x2em iv3no6db jq4qci2q a3bd9o3v b1v8xokw oo9gr5id hzawbc8m'
  83. dateclasses = 'oajrlxb2 g5ia77u1 qu0x051f esr5mh6w e9989ue4 r7d6kgcz rq0escxv nhd2j8a9 nc684nl6 p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso i1ao9s8h esuyzwwr f1sip0of lzcic4wl gmql0nx0 gpro0wi8 b1v8xokw'
  84. for c in context:
  85. try:
  86. date = c.find_element_by_xpath(".//a[@class='"+dateclasses+"']").get_attribute('aria-label')
  87. content = c.find_element_by_xpath(".//span[@class='"+postclass+"']").get_attribute('innerHTML')
  88. likes = c.find_element_by_css_selector('span.pcp91wgn').get_attribute('innerHTML')
  89. shares = 0
  90. try:
  91. shares = c.find_element_by_xpath(".//span[contains(text(), '次分享')]").get_attribute('innerHTML')
  92. shares = shares.replace('次分享','').replace(' ','')
  93. except:
  94. k=1
  95. if '\n' in content:
  96. print('##############################################################')
  97. print('##############################################################')
  98. print('##############################################################')
  99. content=re.sub('<.*?>', '', content)
  100. content = content.replace('\n','')
  101. print('clubName',clubName)
  102. print('likes1', club[2])
  103. print('content',content)
  104. print('date',date)
  105. print('shares',shares)
  106. if date is None:
  107. date = ''
  108. writer.writerow([clubName,club[2], likes, content,date,str(shares)])
  109. except Exception as e:
  110. print(e)
  111. #print (c.find_element_by_xpath("//span[contains(@id, 'jsc_c')]").get_attribute('innerHTML'))
  112. #oajrlxb2
  113. #print (c.find_element_by_xpath("//span[contains(@id, 'jsc_c')]").find_element_by_css_selector('a.oajrlxb2').get_attribute('aria-label'))
  114. driver.close()