import dataset import os, sys import zipfile import pandas as pd from datetime import date from datetime import datetime, timedelta from jinja2 import Environment, FileSystemLoader today = date.today().strftime("%Y/%m/%d") current = datetime.today().strftime("%Y{y}%m{m}%d{d}").format(y='年', m='月', d='日') # zipfile example def zip_dir(path): zf = zipfile.ZipFile('{}.zip'.format(path), 'w', zipfile.ZIP_DEFLATED) for root, dirs, files in os.walk(path): for file_name in files: zf.write(os.path.join(root, file_name)) def data_read(keyword): db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gnews?charset=utf8mb4') result = db.query("select * from gnews.{} a join gnews.{} b on a.url = b.url and a.keyword='{}' and b.crawler_date='{}'".format('url_list2', 'gnews_detail2', keyword, current)) url_pd = pd.DataFrame([dict(i) for i in result]) db.close() return url_pd def main(): if len(sys.argv) > 1 : keyword = sys.argv[1] output_path = 'gnews_md/{}'.format(today.replace('/','')) if not os.path.exists(output_path): os.makedirs(output_path) print('starting :{}'.format(keyword)) data = data_read(keyword) data = data.head(20) for key, row in data.iterrows(): file_loader = FileSystemLoader('gnews_md/template') env = Environment(loader=file_loader) template = env.get_template('gnews.md') output = template.render( title = row['title'], date = today, keyword = [row['keyword']], content = row['detail_content'] ) with open("{}/{}.md".format(output_path, row['title'].replace('//',' ')), "w") as fh: fh.write(output) zip_dir(output_path) if __name__ == "__main__": main()