12345678910111213141516171819202122232425262728293031323334353637 |
- from GoogleNews import GoogleNews
- import dataset
- import datetime
- from celery import Celery
- sss
- app = Celery('tasks', broker='redis://db.ptt.cx')
- @app.task
- def crawl_keyword_news(keyword):
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/cmm_test?charset=utf8mb4')
- news_table = db['news_table']
- googlenews = GoogleNews(lang='zh-TW')
- #kw='裝潢'
- kw=keyword
- googlenews.set_lang('zh-TW')
- #googlenews.search('建材')
- googlenews.search(kw)
- rs=googlenews.results()
- #print(rs)
- for r in rs:
- print(r['title'])
- print(r['desc'])
- print(r['link'])
- print(r['datetime'])
- news_table.insert({'kw':kw,'dt':r['datetime'],'title':r['title'],'desc':r['desc'],'link':r['link'],'crawl_dt':datetime.datetime.now()})
- for i in range(2,6):
- rs = googlenews.page_at(i)
- for r in rs:
- print(r['title'])
- print(r['desc'])
- print(r['link'])
- print(r['datetime'])
- news_table.insert({'kw':kw,'dt':r['datetime'],'title':r['title'],'desc':r['desc'],'link':r['link'],'crawl_dt':datetime.datetime.now()})
|