12345678910111213141516171819 |
- from newspaper import Article
- from chinese_keybert import Chinese_Extractor
- kw_extractor = Chinese_Extractor()
- #url='https://www.momoshop.com.tw/category/MgrpCategory.jsp?m_code=1803900396&cateLevel=2'
- #url='https://www.100.com.tw/article/3471'
- url='http://www.fingermedia.tw/?tag=%E8%91%A3%E4%BA%8B%E9%95%B7%E9%99%B3%E7%99%BE%E6%AC%BD'
- #url='https://www.decorations.com.tw/'
- #url = 'https://www.decorations.com.tw/'
- #url='https://dctdesign.tw/taipei-house-design-top10/'
- #url='https://tw.stock.yahoo.com/news/ccs-insight%E9%A0%90%E6%B8%ACaigc%E8%A2%AB%E9%81%8E%E5%BA%A6%E7%82%92%E4%BD%9C-%E6%98%8E%E5%B9%B4%E5%B0%87-%E9%99%8D%E6%BA%AB-003743296.html'
- #url='https://www.flexclip.com/tw/create/artificial-intelligence-video.html'
- article = Article(url)
- article.download()
- article.parse()
- txt=article.text
- print(txt)
- text=[txt]
- result = kw_extractor.generate_keywords(text,top_k=50,rank_methods="mmr",diversity=0.6)
- print(result)
|