ming 3 rokov pred
rodič
commit
f4e431c050
1 zmenil súbory, kde vykonal 37 pridanie a 0 odobranie
  1. 37 0
      main.py

+ 37 - 0
main.py

@@ -0,0 +1,37 @@
+from GoogleNews import GoogleNews
+import dataset
+import datetime
+from celery import Celery
+
+app = Celery('tasks', broker='redis://db.ptt.cx')
+
+@app.task
+def crawl_keyword_news(keyword):
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/cmm_test?charset=utf8mb4')
+    news_table = db['news_table']
+    googlenews = GoogleNews(lang='zh-TW')
+    #kw='裝潢'
+    kw=keyword
+    googlenews.set_lang('zh-TW')
+    #googlenews.search('建材')
+    googlenews.search(kw)
+
+
+    rs=googlenews.results()
+    #print(rs)
+    for r in rs:
+        print(r['title'])
+        print(r['desc'])
+        print(r['link'])
+        print(r['datetime'])
+        news_table.insert({'kw':kw,'dt':r['datetime'],'title':r['title'],'desc':r['desc'],'link':r['link'],'crawl_dt':datetime.datetime.now()})
+
+    for i in range(2,6):
+        rs = googlenews.page_at(i)
+        for r in rs:
+            print(r['title'])
+            print(r['desc'])
+            print(r['link'])
+            print(r['datetime'])
+            news_table.insert({'kw':kw,'dt':r['datetime'],'title':r['title'],'desc':r['desc'],'link':r['link'],'crawl_dt':datetime.datetime.now()})
+