extract_content.py 700 B

12345678910111213141516
  1. from newspaper import Article
  2. from chinese_keybert import Chinese_Extractor
  3. kw_extractor = Chinese_Extractor()
  4. url='https://www.decorations.com.tw/'
  5. #url = 'https://www.decorations.com.tw/'
  6. #url='https://dctdesign.tw/taipei-house-design-top10/'
  7. #url='https://tw.stock.yahoo.com/news/ccs-insight%E9%A0%90%E6%B8%ACaigc%E8%A2%AB%E9%81%8E%E5%BA%A6%E7%82%92%E4%BD%9C-%E6%98%8E%E5%B9%B4%E5%B0%87-%E9%99%8D%E6%BA%AB-003743296.html'
  8. #url='https://www.flexclip.com/tw/create/artificial-intelligence-video.html'
  9. article = Article(url)
  10. article.download()
  11. article.parse()
  12. txt=article.text
  13. print(txt)
  14. text=[txt]
  15. result = kw_extractor.generate_keywords(text,top_k=40,rank_methods="mmr",diversity=0.6)
  16. print(result)