gen_daily_report.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. #!/usr/bin/python3
  2. from pytrends.request import TrendReq
  3. import pytrends
  4. import sys
  5. from datetime import datetime, timezone, timedelta
  6. import traceback
  7. import requests
  8. from bs4 import BeautifulSoup
  9. import re
  10. #import farmlib
  11. import pandas as pd
  12. import random
  13. import os
  14. import dataset
  15. import json
  16. from gn3 import GoogleNews3
  17. googlenews = GoogleNews3(lang="zh-tw", period="7d",
  18. encode="utf-8", region="tw")
  19. # googlenews.search('蔡英文')
  20. """ result = googlenews.gettext()
  21. for n in range(len(result)):
  22. print(n)
  23. print(result[n])
  24. result = googlenews.get_links()
  25. for n in range(len(result)):
  26. print(n)
  27. print(result[n]) """
  28. """ googlenews.get_news('蔡英文')
  29. trdresult = googlenews.result()
  30. #print(trdresult[0]['title'])
  31. for article in trdresult:
  32. print(article['title']) """
  33. db = dataset.connect(
  34. 'mysql://choozmo:pAssw0rd@db.ptt.cx:3306/yodb?charset=utf8mb4')
  35. cursor = db.query(
  36. '(SELECT * FROM trending_searches ORDER BY ts_date DESC LIMIT 20) ORDER BY ts_date')
  37. """ for c in cursor:
  38. print(c['ts_word']) """
  39. tz = timezone(timedelta(hours=+8))
  40. #print(datetime.now(tz).isoformat(timespec="seconds"))
  41. # print(str(datetime.date.today()).replace('-',''))
  42. trpath = os.getcwd() + '/../webSite/content/trends/'
  43. trdpath = os.getcwd() + '/../webSite/content/trenddetail/'
  44. with open(trpath + str(datetime.today().date()).replace('-', '')+'.md', 'w', encoding='UTF-8') as f:
  45. f.write('+++\n')
  46. f.write('title = "趨勢日報"\n' +
  47. 'date = "' + str(datetime.now(tz).isoformat(timespec="seconds")) + '"\n' +
  48. 'tags = ["daily-trend-watch"]\n' +
  49. 'categories = ["trends"]\n' +
  50. 'layout = "trends"\n' +
  51. 'banner = "https://i.imgur.com/jdQb3ZH.jpg"\n')
  52. for c in cursor:
  53. f.write('no' + str(c['ts_rank']).zfill(2) +
  54. ' = "' + c['ts_word'] + '"\n')
  55. googlenews.clear()
  56. googlenews.get_news(c['ts_word'])
  57. trdresult = googlenews.result()
  58. with open(trdpath + str(datetime.today().date()).replace('-', '')+'_' + str(c['ts_rank']).zfill(2)+'.md', 'w', encoding='UTF-8') as fd:
  59. fd.write('+++\n')
  60. fd.write('title = "趨勢日報"\n' +
  61. 'date = "' + str(datetime.now(tz).isoformat(timespec="seconds")) + '"\n' +
  62. 'layout = "trends"\n' +
  63. 'banner = "https://i.imgur.com/jdQb3ZH.jpg"\n')
  64. rcount = 1
  65. print(c['ts_word'])
  66. for article in trdresult:
  67. fd.write('title' + str(rcount)+' = "' + str(article['title']).replace('"',"'") + '"\n' +
  68. 'desc' + str(rcount)+' = "' + str(article['desc']) + '"\n' +
  69. 'date' + str(rcount)+' = "' + str(article['date']) + '"\n' +
  70. 'datetime' + str(rcount)+' = "' + str(article['datetime']) + '"\n' +
  71. 'link' + str(rcount)+' = "' + str(article['link']) + '"\n' +
  72. 'img' + str(rcount)+' = "' + str(article['img']) + '"\n' +
  73. 'media' + str(rcount)+' = "' + str(article['media']) + '"\n' +
  74. 'site' + str(rcount)+' = "' + str(article['site']) + '"\n')
  75. # print(article)
  76. rcount += 1
  77. fd.write('+++\n')
  78. f.write('+++\n')