main.py 908 B

123456789101112131415161718192021222324252627282930313233343536
  1. import re
  2. from selenium import webdriver
  3. from selenium.webdriver.chrome.options import Options
  4. import time
  5. from bs4 import BeautifulSoup
  6. options = Options()
  7. options.add_experimental_option("debuggerAddress", "127.0.0.1:9527")
  8. driver = webdriver.Chrome(options=options)
  9. url = 'https://mbasic.facebook.com/dttitri/'
  10. driver.get(url)
  11. # time.sleep(5)
  12. # 確認總篇數
  13. htmltext = driver.page_source
  14. soup = BeautifulSoup(htmltext, "lxml")
  15. postList = soup.find_all('article', 'cy fi fj')
  16. #印出貼文
  17. context = driver.find_element_by_xpath('//div[@class="fr"]/div/span')
  18. print(context)
  19. i = 0
  20. for string in context.text.split( ' ' ):
  21. if '\n' in string:
  22. break
  23. else:
  24. i = i + 1
  25. values = re.split(' |\n', context.text)
  26. print(values)
  27. # create new list
  28. with open('movies.csv', 'a',encoding="utf-8") as file:
  29. for i in range(len(values)):
  30. file.write(values[i]+";"+"\n")
  31. file.close()