12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- import dataset
- import os, sys
- import zipfile
- import pandas as pd
- from datetime import date
- from datetime import datetime, timedelta
- from jinja2 import Environment, FileSystemLoader
- today = date.today().strftime("%Y/%m/%d")
- current = datetime.today().strftime("%Y{y}%m{m}%d{d}").format(y='年', m='月', d='日')
- # zipfile example
- def zip_dir(path):
- zf = zipfile.ZipFile('{}.zip'.format(path), 'w', zipfile.ZIP_DEFLATED)
-
- for root, dirs, files in os.walk(path):
- for file_name in files:
- zf.write(os.path.join(root, file_name))
- def data_read(keyword):
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gnews?charset=utf8mb4')
- result = db.query("select * from gnews.{} a join gnews.{} b on a.url = b.url and a.keyword='{}' and b.crawler_date='{}'".format('url_list2', 'gnews_detail2', keyword, current))
- url_pd = pd.DataFrame([dict(i) for i in result])
-
- db.close()
- return url_pd
- def main():
- if len(sys.argv) > 1 :
- keyword = sys.argv[1]
- output_path = 'gnews_md/{}'.format(today.replace('/',''))
- if not os.path.exists(output_path):
- os.makedirs(output_path)
- print('starting :{}'.format(keyword))
- data = data_read(keyword)
- data = data.head(20)
- for key, row in data.iterrows():
- file_loader = FileSystemLoader('gnews_md/template')
- env = Environment(loader=file_loader)
- template = env.get_template('gnews.md')
-
- output = template.render( title = row['title'], date = today,
- keyword = [row['keyword']],
- content = row['detail_content']
- )
-
- with open("{}/{}.md".format(output_path, row['title'].replace('//',' ')), "w") as fh:
- fh.write(output)
-
- zip_dir(output_path)
- if __name__ == "__main__":
- main()
|