|
@@ -0,0 +1,41 @@
|
|
|
+from newspaper import Article
|
|
|
+from GoogleNews import GoogleNews
|
|
|
+
|
|
|
+
|
|
|
+def news(title_and_link):
|
|
|
+ for i in title_and_link:
|
|
|
+ article = Article(i['link'])
|
|
|
+ article.download()
|
|
|
+ article.parse()
|
|
|
+ text = article.text
|
|
|
+ title = i['title']
|
|
|
+ try:
|
|
|
+ with open(f'/googlenews/{title}.txt', 'x') as f:
|
|
|
+ f.write(text)
|
|
|
+ except FileExistsError:
|
|
|
+ print('已有同樣新聞文章')
|
|
|
+ pass
|
|
|
+ return text
|
|
|
+
|
|
|
+
|
|
|
+def google_news(keyword, page):
|
|
|
+ g = GoogleNews()
|
|
|
+ g.setlang('cn')
|
|
|
+ g.setencode('utf-8')
|
|
|
+ g.search(keyword)
|
|
|
+ g.clear()
|
|
|
+ news_link = []
|
|
|
+ for i in range(page):
|
|
|
+ g.get_page(page)
|
|
|
+ result = g.result()
|
|
|
+ for j in result:
|
|
|
+ d = {'title': j['title'], 'link': j['link']}
|
|
|
+ news_link.append(d)
|
|
|
+ g.clear()
|
|
|
+ print(len(news_link))
|
|
|
+ print(news_link)
|
|
|
+ return news_link
|
|
|
+
|
|
|
+
|
|
|
+result = google_news('地震', 1)
|
|
|
+news(result)
|