瀏覽代碼

Merge branch 'master' of http://git.choozmo.com:3000/choozmo/kw_tools into master

jared 3 年之前
父節點
當前提交
7c319464b1
共有 4 個文件被更改,包括 325 次插入4 次删除
  1. 149 0
      click_choozmo/click_job_choozmo.py
  2. 4 4
      monitor/GA_Daily.py
  3. 160 0
      monitor/GA_daily_choozmo.py
  4. 12 0
      monitor/spread2.json

+ 149 - 0
click_choozmo/click_job_choozmo.py

@@ -0,0 +1,149 @@
+#import redis
+import time
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+import codecs
+import random
+import requests
+import time
+import sys
+import docker
+import codecs
+import random
+import os
+import time
+
+
+driver=None
+headers = {
+        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+
+def send_msg(kw):
+    params = {"message": "處理關鍵字: "+kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    print('processing...')
+    print(qs)
+    q=qs[0]
+    domain=qs[2]
+    cnt=qs[1]
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+    if cnt > 0:
+        for i in range(cnt):
+            elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
+            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(2)
+
+
+
+    elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
+    idx=1
+    ranking=-1
+    for elmt in elmts:
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+#            if 'hhh.com.tw' in href:
+#            if 'hhh.com.tw' in href:
+#            if 'ai.choozmo.com' in href:
+            if domain in href:
+#            if 'searchome.net' in href:
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                break
+
+
+
+def re_get_webdriver():
+    global driver
+    result=[]
+    client = docker.from_env()
+    ls=client.containers.list()
+    print(ls)
+    for l in ls:
+        print(l.name)
+        if 'p4444' in l.name:
+            l.restart()
+            print('restarted')
+            time.sleep(6)
+        else:
+            print('name not correct')
+
+#    options = webdriver.EdgeOptions()
+    try:
+        print('trying 4444....')
+        options = webdriver.ChromeOptions()
+        driver = webdriver.Remote(
+            command_executor='http://127.0.0.1:4444/wd/hub',options=webdriver.ChromeOptions())
+        time.sleep(2)
+        print('4444 done')
+        driver.set_window_size(1400,1000)
+        print('driver is fine')
+        return
+    except:
+        print('driver except')
+        driver=None
+        return None
+
+
+
+
+
+
+def run_once(url):
+    global driver
+    i=random.randint(0,20)
+    if i<=3 or driver is None:
+#    if True:
+        re_get_webdriver()
+    if driver is None:
+        print('driver is none')
+        return
+    try:
+        process_query(url)
+    except:
+        print('process_query exception')
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+cursor=db.query('select category from cur_category')
+category='choozmo'
+
+for c in cursor:
+    category=c['category']
+    break
+
+
+cursor=db.query('select kw,page,domain from seo_clickjobs where category="choozmo" order by rand()')
+lst=[]
+for c in cursor:
+    lst.append((c['kw'],c['page'],c['domain']))
+
+while True:
+    l=random.choice(lst)
+    run_once( l )
+    time.sleep(0.001)

+ 4 - 4
monitor/GA_Daily.py

@@ -141,10 +141,10 @@ def main():
 
   result=[]
   for elmt in ga_dict:
-      print(elmt)
-      hour = datetime.now().strftime('%H')
-      if int(hour)+1 > 8 :
-      	send_msg(elmt['ga:pageviews'])
+    print(elmt)
+    hour = datetime.now().strftime('%H')
+    if int(hour)+1 > 8 :
+      send_msg(elmt['ga:pageviews'])
 #      result.append(elmt)
   print('inserting.....')
 

+ 160 - 0
monitor/GA_daily_choozmo.py

@@ -0,0 +1,160 @@
+#!/usr/bin/python3
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+import threading
+from googleapiclient.discovery import build
+from oauth2client.service_account import ServiceAccountCredentials
+import dataset
+from datetime import datetime
+
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
+db.query('delete from ga_pagepath')
+db.begin()
+
+table=db['ga_pagepath']
+
+SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
+KEY_FILE_LOCATION = 'spread2.json'#'/root/src/kw_tools/monitor/choozmo-ga-beee24b7a4c1.json' #'c:\\keys\\choozmo-ga-beee24b7a4c1.json'
+VIEW_ID = '188916214'
+
+# line notify header
+headers = {
+        "Authorization": "Bearer " + "WekCRfnAirSiSxALiD6gcm0B56EejsoK89zFbIaiZQD",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+headers2 = {
+        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+com_table = []
+def creat_table():
+  for i in range(0,24):
+    com_table.append([i,2650*i])
+  com_table.append([24,62000])
+  print(com_table)
+
+def send_msg(kw):
+  # line notify send message
+    current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') #現在時間
+    hour = datetime.now().strftime('%H') 
+    # 判斷是否達標
+    complet="否"
+    for i in range(0,25):
+      if int(hour)+1==com_table[i][0]:
+        print(i)
+        if int(kw) > com_table[i][1] : 
+          complet="是"
+      elif int(hour) == 24:
+        if int(kw) > 56000 : 
+          complet="是"
+    params = {"message": "\nchoozmo \n現在時間: " + current_time + "\n當前pageViews: "+kw + "\n是否達標: " + complet}  
+    print(params)
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+    r2 = requests.post("https://notify-api.line.me/api/notify",headers=headers2, params=params)
+    print(r.text)
+    print(r2.text)
+
+
+def initialize_analyticsreporting():
+  """Initializes an Analytics Reporting API V4 service object.
+
+  Returns:
+    An authorized Analytics Reporting API V4 service object.
+  """
+  credentials = ServiceAccountCredentials.from_json_keyfile_name(
+      KEY_FILE_LOCATION, SCOPES)
+
+  # Build the service object.
+  analytics = build('analyticsreporting', 'v4', credentials=credentials)
+
+  return analytics
+
+
+def get_report(analytics,body):
+  """Queries the Analytics Reporting API V4.
+
+  Args:
+    analytics: An authorized Analytics Reporting API V4 service object.
+  Returns:
+    The Analytics Reporting API V4 response.
+  """
+  return analytics.reports().batchGet(
+      body={
+        'reportRequests':body
+      }
+  ).execute()
+
+
+def print_response(response):
+  """Parses and prints the Analytics Reporting API V4 response.
+
+  Args:
+    response: An Analytics Reporting API V4 response.
+  """
+  result=[]
+  for report in response.get('reports', []):
+    columnHeader = report.get('columnHeader', {})
+    dimensionHeaders = columnHeader.get('dimensions', [])
+    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
+
+    for row in report.get('data', {}).get('rows', []):
+      dimensions = row.get('dimensions', [])
+      dateRangeValues = row.get('metrics', [])
+      ga_dict={}
+
+      for header, dimension in zip(dimensionHeaders, dimensions):
+#        print(header + ': ', dimension)
+        ga_dict[header]=dimension
+      
+      for i, values in enumerate(dateRangeValues):
+#        print('Date range:', str(i))
+        for metricHeader, value in zip(metricHeaders, values.get('values')):
+          ga_dict[metricHeader.get('name')]=value
+#          print(metricHeader.get('name') + ':', value)
+        result.append(ga_dict)
+    return result
+#        print(ga_dict)
+
+def main():
+  analytics = initialize_analyticsreporting()
+#(FB_|facebook|IG_|LINE_|LINEMP_|qsear.ch)
+  current_time = datetime.now().strftime('%Y-%m-%d') #現在時間
+  body=[{ 'viewId': VIEW_ID,
+  'dateRanges': [{'startDate': current_time, 'endDate': current_time}],
+
+  'metrics': [{'expression': 'ga:pageviews'}],
+#  'dimensions': [{'name': 'ga:pagePath'}],
+#  'orderBys':[{"fieldName": "ga:pageviews", "sortOrder": "DESCENDING"}],
+  'pageSize': '100'
+  }]
+
+  response = get_report(analytics,body)
+  ga_dict=print_response(response)
+
+  result=[]
+  for elmt in ga_dict:
+    print(elmt)
+    hour = datetime.now().strftime('%H')
+    if int(hour)+1 > 8 :
+      send_msg(elmt['ga:pageviews'])
+#      result.append(elmt)
+  print('inserting.....')
+
+if __name__ == '__main__':
+  creat_table()
+  main()

+ 12 - 0
monitor/spread2.json

@@ -0,0 +1,12 @@
+{
+  "type": "service_account",
+  "project_id": "dstest-1-292707",
+  "private_key_id": "41b3cec48b4af2e91b89cf6c1644b2fbdf603a72",
+  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC47O9j5gNkeWsY\n7XsBwl08z/pLNKrKPq8zYoI2tlnctHAOowjSkwXRAuERUeY+O4cRn8HNRqtnZ5M/\nt3kLEtmQpQClk7A8pv21bo4lT8nILZLWU1ovo3bzFNaBN7I/1PNUzSm8G5w4aUSy\noO2/beQLoz0gs1e1WlLAQVkS4NvUFagP/nY3sAgW/1mZSeNsHk8x8VLc1t6rIlSl\n9wPQ52KBCTUUu6gveQ5nzpOZ3eNxr+ftllF378tt/tzCqFdI0DkYYv7jxSAfXa+y\nmlHoSE8mhd+M4hvrf4E5jjvdfyNmALiBpyEW2YAMe1qK0Ay3aUodZxSIKCU8FYo2\njIVCBxTtAgMBAAECggEAVUdafECx1s9RbrzxaVXHJoiona7rhfnTVMh8URvVo/yH\n4pAXvPf1CjagMRsKKS/NcXixaGdLGxP+KTeEd/FY6KXW/wR1FPtTa6xQm+9IF+rA\nWNs2b1qcO6wj4ZIcPuiG0FgOg4NeDvuopRGmog1cyWsdgOuDqou9NpDMMXMFnS8Y\nivS3P1pKXSbg7XKQ4mCWfQk5Oq8Uf3OrOOQncFvUazWQDnhI7GZ9yLw+pqCeziWv\nUeGql2EKtjEsOj9zaN4AeGoSyZDcgVo3X4XRR+sq/Jqd2D8m0aQmlMEcEIVLPmtH\nkugDJsS+Yuk7YQFX4pMbKOCYlO2XCX1STPAn+ytxFwKBgQDlkMG8dvCAn3qoTjlh\n4qj8AHlcyZghcR9Ru1qnSZbw7wVBiTVD8L6mEiLmEGumkNHnwnE/s0PuU10POXeu\nrD4AYtLnJ5f42ycsbBw+ncc1qCwqBwdx4Vo0QXNBs4S+zmf6M8fuzT5wWnaHP5Tg\nbdKByPlpIoGwy/7XseRi3qkQLwKBgQDOOEK3fZ+PQ7PBCGw8DcfWXL4dsUoa5KuC\ncZbkE+sOyGAGBwN+UvxxLQqkSLDtuki6t9+9DvtkfY5N79JN5fGId+BBg8HdhCI+\nxKElGj52f1lcukhMJ3zhLsbUFWz0UFJDudG+qaUuqHRjVCLRJTzlTrvqjHtYLwyO\nQPTi5QnpowKBgQDlURbWZpGUSrrCCXH0v/BB209gSti2/0Nj552E4lPvVTSQ5Lja\np1AqoI9P9jMy7hNgSbHLCg3fslKRdLyDNfexdwZqdfivVGvrSgtk2UM37EhBq0fa\nkFwFOyQhC2ydFZ50JumfOFMY9KTWMcNL9SiFEPdj+F5I914YpNEZmoaTbQKBgF+8\nXLTEvEONYbD20RCcMS8CRTyRpt6PVFQtmahu2sw1F+cUcHm/2vRLvcoA+SqUNdmB\nLXyerPS9GUhzUsXZP2VkiZbArUrCYgeTz1/jLCZk/r5+uLuqBV6hEas5+yf89gP9\nCzOhnE7p44aNc9B2oiuufqzn5QdOaFzOKSIAxLZTAoGBAIicGUmg/FPXj9TC7/8E\nRX7TBFEmJOt+cQNCQZ2KLJD4Io1v7tISjyv2dkYxQZ2tMRE3uOniphyAJhSypseL\naDGyd4LMEkRp6Tazg71T3nepb10MH4pWsvc0O5bXxxEyLeaF+1gWdN3TMALv3B3H\nqcxxFjhWQ492akdnAKYRUgGf\n-----END PRIVATE KEY-----\n",
+  "client_email": "service@dstest-1-292707.iam.gserviceaccount.com",
+  "client_id": "118117667194503067224",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token",
+  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/service%40dstest-1-292707.iam.gserviceaccount.com"
+}