123456789101112131415161718192021222324252627282930313233343536 |
- import re
- from selenium import webdriver
- from selenium.webdriver.chrome.options import Options
- import time
- from bs4 import BeautifulSoup
- options = Options()
- options.add_experimental_option("debuggerAddress", "127.0.0.1:9527")
- driver = webdriver.Chrome(options=options)
- url = 'https://mbasic.facebook.com/dttitri/'
- driver.get(url)
- # time.sleep(5)
- # 確認總篇數
- htmltext = driver.page_source
- soup = BeautifulSoup(htmltext, "lxml")
- postList = soup.find_all('article', 'cy fi fj')
- #印出貼文
- context = driver.find_element_by_xpath('//div[@class="fr"]/div/span')
- print(context)
- i = 0
- for string in context.text.split( ' ' ):
- if '\n' in string:
- break
- else:
- i = i + 1
- values = re.split(' |\n', context.text)
- print(values)
- # create new list
- with open('movies.csv', 'a',encoding="utf-8") as file:
- for i in range(len(values)):
- file.write(values[i]+";"+"\n")
- file.close()
|