data_copy.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. from fastapi import APIRouter, Form, Depends, HTTPException, File, UploadFile,Request
  2. from fastapi.responses import FileResponse
  3. from fastapi_login.exceptions import InvalidCredentialsException
  4. from fastapi_login import LoginManager
  5. from fastapi.responses import HTMLResponse
  6. import re
  7. import pandas as pd
  8. import ast
  9. import pandas as pd
  10. import csv
  11. import matplotlib.pyplot as plt
  12. import matplotlib
  13. matplotlib.rc('font', family='Microsoft YaHei')
  14. pd.set_option('display.max_columns', None)
  15. data = APIRouter()
  16. def fix_dict_format(s):
  17. if isinstance(s, str):
  18. pairs = re.findall(r"'([^']+)':(\d+)", s)
  19. fixed_dict = "{" + ", ".join([f"'{key}': {value}" for key, value in pairs]) + "}"
  20. return fixed_dict
  21. else:
  22. return s
  23. def age_group(file):
  24. # 讀檔,把 position 轉成 dictionary 格式
  25. user_information = pd.read_csv(file)
  26. user_information['position'] = user_information['position'].apply(fix_dict_format)
  27. user_information['position'].fillna('{}', inplace=True)
  28. user_information['position'] = user_information['position'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else {})
  29. # only學員
  30. user_information = user_information[user_information['position'].apply(lambda x: x.get('學員', 0)==1)]
  31. #user_information[user_information['gender'].str.strip() != '']['gender']
  32. # 生日
  33. birthday = pd.to_datetime(user_information['birthday'], format='%Y-%m-%d')
  34. # 年齡
  35. current_date = pd.to_datetime('today')
  36. user_information['age'] = ((current_date - birthday).dt.days / 365).round(1).astype(int)
  37. # 按年齡分群
  38. bins = [0, 18, 25, 35, 45, 55, 65, float('inf')]
  39. labels = ['18-', '18-24', '25-34', '35-44', '45-54', '55-64', '65+']
  40. user_information['age_group'] = pd.cut(user_information['age'], bins=bins, labels=labels, right=False)
  41. age_group_counts = user_information['age_group'].value_counts(sort=False)
  42. # 畫圖
  43. fig, ax = plt.subplots(figsize=(5, 3))
  44. age_group_counts.plot(kind='bar', rot=0, ax=ax)
  45. ax.set_title('Age Distribution')
  46. ax.set_xlabel('Age Group')
  47. ax.set_ylabel('Count')
  48. # plt.show()
  49. print(age_group_counts)
  50. return age_group_counts
  51. @data.post("/print")
  52. async def upload_file():
  53. age_group_counts = age_group('/var/www/ntcri_api/app/api/user_information_change.csv')
  54. return {"data": age_group_counts.to_string()}
  55. if __name__ == '__main__':
  56. age_group_counts = age_group('user_information_change.csv')
  57. print(age_group_counts)