gnew_md.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import dataset
  2. import os, sys
  3. import zipfile
  4. import pandas as pd
  5. from datetime import date
  6. from datetime import datetime, timedelta
  7. from jinja2 import Environment, FileSystemLoader
  8. today = date.today().strftime("%Y/%m/%d")
  9. current = datetime.today().strftime("%Y{y}%m{m}%d{d}").format(y='年', m='月', d='日')
  10. # zipfile example
  11. def zip_dir(path):
  12. zf = zipfile.ZipFile('{}.zip'.format(path), 'w', zipfile.ZIP_DEFLATED)
  13. for root, dirs, files in os.walk(path):
  14. for file_name in files:
  15. zf.write(os.path.join(root, file_name))
  16. def data_read(keyword):
  17. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gnews?charset=utf8mb4')
  18. result = db.query("select * from gnews.{} a join gnews.{} b on a.url = b.url and a.keyword='{}' and b.crawler_date='{}'".format('url_list2', 'gnews_detail2', keyword, current))
  19. url_pd = pd.DataFrame([dict(i) for i in result])
  20. db.close()
  21. return url_pd
  22. def main():
  23. if len(sys.argv) > 1 :
  24. keyword = sys.argv[1]
  25. output_path = 'gnews_md/{}'.format(today.replace('/',''))
  26. if not os.path.exists(output_path):
  27. os.makedirs(output_path)
  28. print('starting :{}'.format(keyword))
  29. data = data_read(keyword)
  30. data = data.head(20)
  31. for key, row in data.iterrows():
  32. file_loader = FileSystemLoader('gnews_md/template')
  33. env = Environment(loader=file_loader)
  34. template = env.get_template('gnews.md')
  35. output = template.render( title = row['title'], date = today,
  36. keyword = [row['keyword']],
  37. content = row['detail_content']
  38. )
  39. with open("{}/{}.md".format(output_path, row['title'].replace('//',' ')), "w") as fh:
  40. fh.write(output)
  41. zip_dir(output_path)
  42. if __name__ == "__main__":
  43. main()