parse_hw.py 786 B

12345678910111213141516171819202122232425262728293031323334
  1. import os
  2. import codecs
  3. import dataset
  4. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  5. table=db['homework']
  6. lst=os.listdir(r'C:\data\homework')
  7. def proc_email(fname):
  8. fr=codecs.open(fname,'r','utf-8')
  9. lines=fr.readlines()
  10. fr.close()
  11. email=None
  12. url=None
  13. for l in lines:
  14. if 'From:' in l:
  15. elmts=l.split("<")
  16. eml=elmts[1].replace('>','')
  17. print(eml)
  18. email=eml
  19. if 'http' in l:
  20. print(l)
  21. url=l
  22. if email is not None:
  23. break
  24. table.insert({'fname':fname,'email':email,'url':url})
  25. for l in lst:
  26. fname=r'C:\data\homework'+"\\"+l
  27. if '.eml' in fname:
  28. print(fname)
  29. proc_email(fname)
  30. #print(lst)