main.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. from GoogleNews import GoogleNews
  2. import dataset
  3. import datetime
  4. from celery import Celery
  5. sss
  6. app = Celery('tasks', broker='redis://db.ptt.cx')
  7. @app.task
  8. def crawl_keyword_news(keyword):
  9. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/cmm_test?charset=utf8mb4')
  10. news_table = db['news_table']
  11. googlenews = GoogleNews(lang='zh-TW')
  12. #kw='裝潢'
  13. kw=keyword
  14. googlenews.set_lang('zh-TW')
  15. #googlenews.search('建材')
  16. googlenews.search(kw)
  17. rs=googlenews.results()
  18. #print(rs)
  19. for r in rs:
  20. print(r['title'])
  21. print(r['desc'])
  22. print(r['link'])
  23. print(r['datetime'])
  24. news_table.insert({'kw':kw,'dt':r['datetime'],'title':r['title'],'desc':r['desc'],'link':r['link'],'crawl_dt':datetime.datetime.now()})
  25. for i in range(2,6):
  26. rs = googlenews.page_at(i)
  27. for r in rs:
  28. print(r['title'])
  29. print(r['desc'])
  30. print(r['link'])
  31. print(r['datetime'])
  32. news_table.insert({'kw':kw,'dt':r['datetime'],'title':r['title'],'desc':r['desc'],'link':r['link'],'crawl_dt':datetime.datetime.now()})