parse_result.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import codecs
  2. import sys
  3. import codecs
  4. import traceback
  5. import requests
  6. import re
  7. import pandas as pd
  8. import random
  9. import urllib
  10. import json
  11. import gspread
  12. import datetime
  13. from gspread_pandas import Spread, Client
  14. from oauth2client.service_account import ServiceAccountCredentials
  15. import os
  16. def save_sheet(df,filename,tabname,startpos='A1'):
  17. scope = ['https://spreadsheets.google.com/feeds',
  18. 'https://www.googleapis.com/auth/drive']
  19. # credentials = ServiceAccountCredentials.from_json_keyfile_name('c:\\keys\\service\\gspread.json', scope)
  20. credentials = ServiceAccountCredentials.from_json_keyfile_name('c:\\keys\\spread2.json', scope)
  21. gc = gspread.authorize(credentials)
  22. spread = Spread(filename,creds=credentials)
  23. spread.df_to_sheet(df, index=False, sheet=tabname, start=startpos, replace=False)
  24. fr=codecs.open('c:/tmp/out.txt','r','utf-8')
  25. lines=fr.readlines()
  26. fr.close()
  27. cur_kw=None
  28. fulldict={}
  29. for l in lines:
  30. l=l.replace('\n','').replace('\u202a','').replace('\u202c','')
  31. if '(' in l:
  32. cur_kw=l
  33. else:
  34. elmts=l.split(',')
  35. lst=[]
  36. for elmt in elmts:
  37. lst.append(elmt)
  38. fulldict[cur_kw]=lst
  39. # fulldict
  40. # print(l)
  41. print(fulldict)
  42. def get_num_n(fulldict,n):
  43. result=[]
  44. for k,v in fulldict.items():
  45. if len(fulldict[k])<n+1:
  46. result.append(' ')
  47. else:
  48. result.append(fulldict[k][n])
  49. return result
  50. cols=[]
  51. maxlen=0
  52. for k,v in fulldict.items():
  53. elmts=k.split('(')
  54. k=elmts[0]
  55. cols.append(k)
  56. if len(v)>maxlen:
  57. maxlen=len(v)
  58. df = pd.DataFrame(columns=tuple(cols))
  59. llen=len(fulldict.items())
  60. for i in range(maxlen):
  61. df.loc[i]=get_num_n(fulldict,i)
  62. print(df)
  63. save_sheet(df,'keyword-details','風水')