|
@@ -0,0 +1,37 @@
|
|
|
|
+from GoogleNews import GoogleNews
|
|
|
|
+import dataset
|
|
|
|
+import datetime
|
|
|
|
+from celery import Celery
|
|
|
|
+
|
|
|
|
+app = Celery('tasks', broker='redis://db.ptt.cx')
|
|
|
|
+
|
|
|
|
+@app.task
|
|
|
|
+def crawl_keyword_news(keyword):
|
|
|
|
+ db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/cmm_test?charset=utf8mb4')
|
|
|
|
+ news_table = db['news_table']
|
|
|
|
+ googlenews = GoogleNews(lang='zh-TW')
|
|
|
|
+ #kw='裝潢'
|
|
|
|
+ kw=keyword
|
|
|
|
+ googlenews.set_lang('zh-TW')
|
|
|
|
+ #googlenews.search('建材')
|
|
|
|
+ googlenews.search(kw)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ rs=googlenews.results()
|
|
|
|
+ #print(rs)
|
|
|
|
+ for r in rs:
|
|
|
|
+ print(r['title'])
|
|
|
|
+ print(r['desc'])
|
|
|
|
+ print(r['link'])
|
|
|
|
+ print(r['datetime'])
|
|
|
|
+ news_table.insert({'kw':kw,'dt':r['datetime'],'title':r['title'],'desc':r['desc'],'link':r['link'],'crawl_dt':datetime.datetime.now()})
|
|
|
|
+
|
|
|
|
+ for i in range(2,6):
|
|
|
|
+ rs = googlenews.page_at(i)
|
|
|
|
+ for r in rs:
|
|
|
|
+ print(r['title'])
|
|
|
|
+ print(r['desc'])
|
|
|
|
+ print(r['link'])
|
|
|
|
+ print(r['datetime'])
|
|
|
|
+ news_table.insert({'kw':kw,'dt':r['datetime'],'title':r['title'],'desc':r['desc'],'link':r['link'],'crawl_dt':datetime.datetime.now()})
|
|
|
|
+
|