main.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. from GoogleNews import GoogleNews
  2. import dataset
  3. import datetime
  4. from celery import Celery
  5. app = Celery('tasks', broker='redis://db.ptt.cx')
  6. @app.task
  7. def crawl_keyword_news(keyword):
  8. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/cmm_test?charset=utf8mb4')
  9. news_table = db['news_table']
  10. googlenews = GoogleNews(lang='zh-TW')
  11. #kw='裝潢'
  12. kw=keyword
  13. googlenews.set_lang('zh-TW')
  14. #googlenews.search('建材')
  15. googlenews.search(kw)
  16. rs=googlenews.results()
  17. #print(rs)
  18. for r in rs:
  19. print(r['title'])
  20. print(r['desc'])
  21. print(r['link'])
  22. print(r['datetime'])
  23. news_table.insert({'kw':kw,'dt':r['datetime'],'title':r['title'],'desc':r['desc'],'link':r['link'],'crawl_dt':datetime.datetime.now()})
  24. for i in range(2,6):
  25. rs = googlenews.page_at(i)
  26. for r in rs:
  27. print(r['title'])
  28. print(r['desc'])
  29. print(r['link'])
  30. print(r['datetime'])
  31. news_table.insert({'kw':kw,'dt':r['datetime'],'title':r['title'],'desc':r['desc'],'link':r['link'],'crawl_dt':datetime.datetime.now()})