ga_exp_path.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. #!/usr/bin/python3
  2. import sys
  3. import codecs
  4. import traceback
  5. import requests
  6. import re
  7. import pandas as pd
  8. import random
  9. import urllib
  10. import dataset
  11. import json
  12. import gspread
  13. import datetime
  14. from gspread_pandas import Spread, Client
  15. from oauth2client.service_account import ServiceAccountCredentials
  16. import os
  17. import threading
  18. from apiclient.discovery import build
  19. from oauth2client.service_account import ServiceAccountCredentials
  20. import dataset
  21. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
  22. db.query('delete from ga_designer_path')
  23. db.begin()
  24. table=db['ga_designer_path']
  25. SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
  26. KEY_FILE_LOCATION = 'c:\\keys\\choozmo-ga-beee24b7a4c1.json'
  27. VIEW_ID = '188916214'
  28. def initialize_analyticsreporting():
  29. """Initializes an Analytics Reporting API V4 service object.
  30. Returns:
  31. An authorized Analytics Reporting API V4 service object.
  32. """
  33. credentials = ServiceAccountCredentials.from_json_keyfile_name(
  34. KEY_FILE_LOCATION, SCOPES)
  35. # Build the service object.
  36. analytics = build('analyticsreporting', 'v4', credentials=credentials)
  37. return analytics
  38. def get_report(analytics,body):
  39. """Queries the Analytics Reporting API V4.
  40. Args:
  41. analytics: An authorized Analytics Reporting API V4 service object.
  42. Returns:
  43. The Analytics Reporting API V4 response.
  44. """
  45. return analytics.reports().batchGet(
  46. body={
  47. 'reportRequests':body
  48. }
  49. ).execute()
  50. def print_response(response):
  51. """Parses and prints the Analytics Reporting API V4 response.
  52. Args:
  53. response: An Analytics Reporting API V4 response.
  54. """
  55. result=[]
  56. for report in response.get('reports', []):
  57. columnHeader = report.get('columnHeader', {})
  58. dimensionHeaders = columnHeader.get('dimensions', [])
  59. metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
  60. for row in report.get('data', {}).get('rows', []):
  61. dimensions = row.get('dimensions', [])
  62. dateRangeValues = row.get('metrics', [])
  63. ga_dict={}
  64. for header, dimension in zip(dimensionHeaders, dimensions):
  65. # print(header + ': ', dimension)
  66. ga_dict[header]=dimension
  67. for i, values in enumerate(dateRangeValues):
  68. # print('Date range:', str(i))
  69. for metricHeader, value in zip(metricHeaders, values.get('values')):
  70. ga_dict[metricHeader.get('name')]=value
  71. # print(metricHeader.get('name') + ':', value)
  72. result.append(ga_dict)
  73. return result
  74. # print(ga_dict)
  75. def main():
  76. analytics = initialize_analyticsreporting()
  77. #(FB_|facebook|IG_|LINE_|LINEMP_|qsear.ch)
  78. body=[{ 'viewId': VIEW_ID,
  79. # 'dateRanges': [{'startDate': '2021-03-01', 'endDate': '2021-06-24'}],
  80. 'dateRanges': [{'startDate': '2021-07-01', 'endDate': '2021-07-16'}],
  81. # 'dateRanges': [{'startDate': '2021-06-01', 'endDate': '2021-06-24'}],
  82. 'metrics': [{'expression': 'ga:users'},{'expression': 'ga:newusers'},{'expression': 'ga:sessions'},{'expression': 'ga:pageviews'},{'expression': 'ga:bounceRate'},{'expression': 'ga:pageviewsPerSession'}],
  83. 'dimensions': [{'name': 'ga:landingPagePath'},{'name': 'ga:secondPagePath'}],
  84. # 'filters':'ga:secondPagePath=@/designers/cases/',
  85. "dimensionFilterClauses":
  86. [
  87. {
  88. "filters":
  89. [
  90. {
  91. "dimensionName": "ga:secondPagePath",
  92. "operator": "REGEXP",
  93. "expressions": ['/designers/cases/']
  94. }
  95. ]
  96. }
  97. ],
  98. # 'filters':[{'expression':'ga:secondPagePath=@/designers/cases/'}],
  99. # "filtersExpression":[{"expression":"ga:secondPagePath=@/designers/cases/"}],
  100. #'dimensionFilterClauses': [
  101. # {"filters": [{"dimensionName": "ga:secondPagePath",
  102. # "operator": "=@",
  103. # "expressions": ["/designers/cases/"]}]
  104. # }
  105. # ],
  106. # "filters":[{"expression":"ga:secondPagePath=@/designers/cases/"}],
  107. # 'orderBys':[{"fieldName": "ga:pageviews", "sortOrder": "DESCENDING"}],
  108. 'pageSize': '2000'
  109. }]
  110. response = get_report(analytics,body)
  111. ga_dict=print_response(response)
  112. result=[]
  113. for elmt in ga_dict:
  114. # elmt['ga:landingPagePath']='https://hhh.com.tw'+elmt['ga:landingPagePath']
  115. txt=elmt['ga:landingPagePath']
  116. e2=txt.split('/')
  117. if len(e2)>2:
  118. elmt['orig']='/'+e2[1]+'/'+e2[2]+"/"
  119. else:
  120. elmt['orig']='/'+e2[1]+'/'
  121. # elmt['ga:landingPagePath']=elmt['ga:landingPagePath']
  122. # print(elmt)
  123. result.append(elmt)
  124. print('inserting.....')
  125. for r in result:
  126. try:
  127. table.insert(r)
  128. except:
  129. print('exception')
  130. db.commit()
  131. # elmt['category']='social'
  132. if __name__ == '__main__':
  133. main()