瀏覽代碼

Merge branch 'master' of http://git.choozmo.com:3000/choozmo/kw_tools

Mia 3 年之前
父節點
當前提交
ed50658eaf

+ 20 - 0
.favorites.json

@@ -0,0 +1,20 @@
+[
+    {
+        "type": "File",
+        "name": "c:\\gitlab\\kw_tools\\kw_tools\\hhh\\reports\\content_gap_ranking.py",
+        "parent_id": null,
+        "fsPath": "c:\\gitlab\\kw_tools\\kw_tools\\hhh\\reports\\content_gap_ranking.py",
+        "workspaceRoot": null,
+        "workspacePath": null,
+        "id": "xPWkXaQUpezJ3ISo"
+    },
+    {
+        "type": "File",
+        "name": "c:\\gitlab\\kw_tools\\kw_tools\\choozmo\\gsearch_selenium.py",
+        "parent_id": null,
+        "fsPath": "c:\\gitlab\\kw_tools\\kw_tools\\choozmo\\gsearch_selenium.py",
+        "workspaceRoot": null,
+        "workspacePath": null,
+        "id": "uQeJKy35vHfLxa6P"
+    }
+]

+ 5 - 4
choozmo/clickbot.py

@@ -87,7 +87,7 @@ desired_capabilities=options.to_capabilities())
 #desired_capabilities=DesiredCapabilities.CHROME)
 driver.set_window_size(1400,1000)
 
-#qlist=['大進設計 583','大進設計 免費專線','大進設計 林函青','大進設計 hhh','大進設計 林函青設計師 hhh','大進空間設計 hhh','林函青 hhh']
+qlist=['大進設計 583','大進設計 免費專線','大進設計 林函青','大進設計 hhh','大進設計 林函青設計師 hhh','大進空間設計 hhh','林函青 hhh']
 #qlist=['AI Spokesgirl','choozmo ai spokesgirl','choozmo ai 主播','choozmo 國際代言人','choozmo 國際主播','choozmo ai短影片','choozmo 短影片','AI虛擬主播 技術 choozmo','choozmo ai短影片','choozmo 虛擬人','choozmo 虛擬播報員','choozmo ai video','choozmo virtual presenter','choozmo virtual avatar','choozmo ai spokesperson']
 #qlist+=['ai seowriter choozmo','gpt3 choozmo','蝦皮 seo site:ai.choozmo.com','vtuber choozmo','seo site:ai.choozmo.com','台灣第一位ai主播','ai主播 2021..2021','AI主播韓小夏 choozmo','ai spokesgirl','ai主播 韓小夏 choozmo','主播 韓小夏 choozmo','ai spokesgirl 集仕多','ai 合成 主播 choozmo','台灣AI主播 choozmo','台灣 ai主播  choozmo']
 #qlist+=['ai seowriter choozmo','gpt3 choozmo','蝦皮 seo site:ai.choozmo.com','vtuber choozmo','seo site:ai.choozmo.com','youtube seo choozmo','video marketing choozmo','deepmind choozmo']
@@ -96,10 +96,11 @@ driver.set_window_size(1400,1000)
 #qlist+=[('local seo choozmo',0),('video production choozmo',0),('Google Slides choozmo',0),('video production choozmo',0),('絆愛 vtuber choozmo',0),('vtuber 鯊魚 choozmo',0),('ai unicorn choozmo',0),('韓國 ai 主播 choozmo',0)]
 #qlist+=[('seo 推薦 choozmo',0),('choozmo seo 推薦',0),('台灣 vtuber choozmo',0),('vtuber choozmo',0),('台灣 choozmo',0),('Anthony Bourdain choozmo',0),('角巻 choozmo',0),('竹北 seo choozmo',0)]
 
-qlist=[('ai 主播',2,'choozmo.com'),('ai主播',2,'choozmo.com'),('台灣 vtuber choozmo',0,'choozmo.com'),('vtuber choozmo',0,'choozmo.com'),('vtuber 是什麼 choozmo',0,'choozmo.com'),('vtuber site:choozmo.com',0,'choozmo.com')]
-qlist+=[('seo 推薦 choozmo',0,'choozmo.com'),('蝦皮 seo site:ai.choozmo.com',0,'choozmo.com'),('ppt to mp4 choozmo',0,'choozmo.com')]
-qlist+=[('預售屋客變教學',0,'hhh.com.tw'),('預售屋客變',0,'hhh.com.tw'),('預售屋客變流程',0,'hhh.com.tw'),('預售屋客變省錢',0,'hhh.com.tw'),('所有的地方都能進行客變嗎',0,'hhh.com.tw'),('什麼是客變',0,'hhh.com.tw')]
+#qlist=[('ai 主播',2,'choozmo.com'),('ai主播',2,'choozmo.com'),('台灣 vtuber choozmo',0,'choozmo.com'),('vtuber choozmo',0,'choozmo.com'),('vtuber 是什麼 choozmo',0,'choozmo.com'),('vtuber site:choozmo.com',0,'choozmo.com')]
+#qlist+=[('seo 推薦 choozmo',0,'choozmo.com'),('蝦皮 seo site:ai.choozmo.com',0,'choozmo.com'),('ppt to mp4 choozmo',0,'choozmo.com')]
+#qlist+=[('預售屋客變教學',0,'hhh.com.tw'),('預售屋客變',0,'hhh.com.tw'),('預售屋客變流程',0,'hhh.com.tw'),('預售屋客變省錢',0,'hhh.com.tw'),('所有的地方都能進行客變嗎',0,'hhh.com.tw'),('什麼是客變',0,'hhh.com.tw')]
 
+#qlist=[('大欣室內裝修設計',0,'hhh.com.tw'),('宋茂松',0,'hhh.com.tw'),('大欣室內裝修設計 宋茂松',0,'hhh.com.tw'),('大欣室內設計',0,'hhh.com.tw'),('大欣室內裝修',0,'hhh.com.tw'),('大欣室內裝修',0,'hhh.com.tw'),('大欣室內裝修影音',0,'hhh.com.tw')]
 #qlist=[('居家風水',0),('風水',2)]
 
 #qlist+=[('DHIA 黃靜文',0,'hhh.com.tw'),('藏風空間設計',1,'hhh.com.tw'),('天花板設計',1,'hhh.com.tw'),('陽台佈置',0,'hhh.com.tw'),('設計師',0,'hhh.com.tw'),('玄關設計',0,'hhh.com.tw'),('廚房設計',0,'hhh.com.tw'),('風水',2,'hhh.com.tw'),('住宅風水',1,'hhh.com.tw'),('居家風水',0,'hhh.com.tw'),('居家 風水',0,'hhh.com.tw'),('客廳連廚房',0,'hhh.com.tw'),('前陽台玄關',1,'hhh.com.tw')]

+ 1 - 1
choozmo/db_clickjob.py

@@ -92,7 +92,7 @@ def run_once(q):
     print(q)
     print(q[0])
     process_query(q)
-    send_msg(q[0])
+#    send_msg(q[0])
 #            empty_query(q)
 
     intsleep=random.randint(5,12)

+ 12 - 2
choozmo/gen_clickjob.py

@@ -10,8 +10,18 @@ table=db['seo_clickjobs']
 #cursor=db.query('select id,kw from gtrends where name = "'+site+'" order by rand() limit 1')
 
 category='hhh-faq'
-
-qlist=[('預售屋客變教學',0,'hhh.com.tw'),('預售屋客變',0,'hhh.com.tw'),('預售屋客變流程',0,'hhh.com.tw'),('預售屋客變省錢',0,'hhh.com.tw'),('所有的地方都能進行客變嗎',0,'hhh.com.tw'),('什麼是客變',0,'hhh.com.tw')]
+qlist=[('宇拓室內裝修',2,'hhh.com.tw'),('宇拓室內裝修設計',1,'hhh.com.tw'),('宇拓室內裝潢',2,'hhh.com.tw'),('宇拓空間規劃',1,'hhh.com.tw'),('宇拓室內設計師',1,'hhh.com.tw')]
+#qlist=[('對視室內設計工坊',0,'hhh.com.tw'),('對視室內設計',1,'hhh.com.tw'),('對視室內設計工程',1,'hhh.com.tw'),('對視室內裝修',0,'hhh.com.tw'),('對視室內裝潢',0,'hhh.com.tw'),('對視室內設計師',0,'hhh.com.tw'),('對視室內規劃',1,'hhh.com.tw')]
+#qlist=[('合瑪設計工程',0,'hhh.com.tw'),('合瑪設計',1,'hhh.com.tw'),('合瑪設計工程有限公司',1,'hhh.com.tw'),('合瑪裝修',1,'hhh.com.tw'),('合瑪裝潢',1,'hhh.com.tw'),('合瑪空間設計師',0,'hhh.com.tw'),('合瑪空間設計',1,'hhh.com.tw')]
+#qlist=[('肯星設計',1,'hhh.com.tw'),('肯星設計費用',1,'hhh.com.tw'),('肯星設計師',1,'hhh.com.tw'),('肯星設計工程',1,'hhh.com.tw')]
+#qlist=[('筑新裝修工程',0,'hhh.com.tw'),('筑新裝修',0,'hhh.com.tw'),('筑新裝潢',2,'hhh.com.tw'),('筑新',1,'hhh.com.tw'),('筑新設計',0,'hhh.com.tw'),('筑新空間設計',1,'hhh.com.tw'),('筑新工程',1,'hhh.com.tw')]
+#qlist=[('WED木象室內設計',0,'hhh.com.tw'),('WED木象',0,'hhh.com.tw'),('WED木象室內裝修',0,'hhh.com.tw'),('WED木象室內裝潢',0,'hhh.com.tw'),('WED木象室內設計 台北',0,'hhh.com.tw'),('WED木象室內空間',0,'hhh.com.tw'),('WED木象室內裝潢工程',0,'hhh.com.tw'),('WED木象空間規劃',0,'hhh.com.tw')]
+#qlist=[('東江齋設計 River Cabin D+',0,'hhh.com.tw'),('東江齋設計',0,'hhh.com.tw'),('東江齋',1,'hhh.com.tw'),('東江齋裝修',2,'hhh.com.tw'),('東江齋空間',0,'hhh.com.tw'),('東江齋空間設計',0,'hhh.com.tw')]
+#qlist=[('澤物設計裝修',2,'hhh.com.tw'),('孫詩哲',2,'hhh.com.tw'),('澤物室內裝潢 高雄',1,'hhh.com.tw'),('澤物設計 孫詩哲',2,'hhh.com.tw'),('澤物室內設計 高雄',2,'hhh.com.tw')]
+#qlist=[('翊程設計',0,'hhh.com.tw'),('翊程設計公司',0,'hhh.com.tw'),('翊程室內裝修工程有限公司',1,'hhh.com.tw'),('翊程',0,'hhh.com.tw'),('翊程室內裝潢',0,'hhh.com.tw'),('翊程室內裝潢',0,'hhh.com.tw'),('翊程設計團隊',0,'hhh.com.tw'),('翊程設計裝修',0,'hhh.com.tw')]
+#qlist=[('迪品空間設計',0,'hhh.com.tw'),('迪品空間',1,'hhh.com.tw'),('迪品空間裝修',1,'hhh.com.tw'),('施芷穎',0,'hhh.com.tw'),('迪品空間設計 施芷穎',0,'hhh.com.tw'),('迪品空間設計 總監',0,'hhh.com.tw'),('alaia 施芷穎',0,'hhh.com.tw'),('台中 迪品空間設計',0,'hhh.com.tw')]
+#qlist=[('大欣室內裝修設計',0,'hhh.com.tw'),('宋茂松',0,'hhh.com.tw'),('大欣室內裝修設計 宋茂松',0,'hhh.com.tw'),('大欣室內設計',0,'hhh.com.tw'),('大欣室內裝修',0,'hhh.com.tw'),('大欣室內裝修',0,'hhh.com.tw'),('大欣室內裝修影音',0,'hhh.com.tw')]
+#qlist=[('預售屋客變教學',0,'hhh.com.tw'),('預售屋客變',0,'hhh.com.tw'),('預售屋客變流程',0,'hhh.com.tw'),('預售屋客變省錢',0,'hhh.com.tw'),('所有的地方都能進行客變嗎',0,'hhh.com.tw'),('什麼是客變',0,'hhh.com.tw')]
 
 for q in qlist:
     table.insert({'kw':q[0],'page':q[1],'domain':q[2],'category':category})

+ 18 - 0
choozmo/gen_cur_category.py

@@ -0,0 +1,18 @@
+import time
+import json
+import random
+import dataset
+#r = redis.Redis(host='db.ptt.cx', port=6379, db=1)
+#p = r.pubsub(ignore_subscribe_messages=True)
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+table=db['cur_category']
+#cursor=db.query('select id,kw from gtrends where name = "'+site+'" order by rand() limit 1')
+
+category='hhh-faq'
+
+table.upsert({'category':category,'id':1},keys=['id'])
+
+#table.insert({'category':category},)
+
+

+ 10 - 0
choozmo/httpservertest.py

@@ -0,0 +1,10 @@
+import http.server
+import socketserver
+
+PORT = 80
+
+Handler = http.server.SimpleHTTPRequestHandler
+
+with socketserver.TCPServer(("", PORT), Handler) as httpd:
+    print("serving at port", PORT)
+    httpd.serve_forever()

+ 83 - 0
choozmo/igool/igtree.py

@@ -0,0 +1,83 @@
+import suggests
+import networkx as nx
+import pyvis
+import time
+from pyvis.network import Network
+import pickle
+
+
+#kw='覆髓'p
+#kw='塗氟'
+#kw='口掃機'
+#kw='牙醫助理'
+#kw='牙材'
+#kw='牙醫師公會'
+#kw='防齲'
+#kw='齒模'
+#kw='金屬牙套'
+#kw='醫療法'
+#kw='牙醫師手冊'
+#kw='貝氏刷牙'
+#kw='牙醫積分'
+#kw='牙醫師'
+#kw='牙醫全聯會'
+#kw='牙醫系'
+#kw='台大牙醫'
+#kw='成大牙醫'
+#kw='陽明牙醫'
+#kw='北醫牙醫'
+#kw='醫學系公費生'
+#kw='醫學系自費生'
+#kw='北醫牙醫'
+
+#kw='牙醫學會'
+#kw='牙醫總額'
+#kw='牙醫健保'
+#kw='文化資產'
+#kw='藝文團體'
+#kw='書房 設計'
+#kw='室內設計'
+#kw='2021風水擺設'
+#kw='電視牆'
+#kw='系統櫃'
+#kw='收納'
+#kw='軟糖'
+#kw='手工餅乾'
+#kw='白巧克力'
+kw='黑巧克力'
+
+#kw='生巧克力'
+#kw='牛奶巧克力'
+
+#kw='廣告投放策略'
+
+#s={'suggests':[]}
+s = suggests.suggests.get_suggests(kw, source='google')
+#G = nx.Graph()
+G = pickle.load( open( "gs2.p", "rb" ) )
+
+
+#G.remove_node('巧克力囊腫')
+#G.remove_node('巧克力雲莊')
+
+for sg in s['suggests']:
+    G.add_edge(kw,sg,weight=1)
+
+    print(sg)
+    time.sleep(1)
+    s2 = suggests.suggests.get_suggests(sg, source='google')
+    for elmt in s2['suggests']:
+        G.add_edge(sg,elmt,weight=1)
+
+G.remove_nodes_from(list(nx.isolates(G)))
+G.remove_edges_from( list(nx.selfloop_edges(G)))
+
+pickle.dump( G, open( "gs2.p", "wb" ) )
+
+pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
+pyG.from_nx(G)
+pyG.show('gs.html')
+
+
+
+

+ 5 - 0
ghostdriver.log

@@ -0,0 +1,5 @@
+[INFO  - 2021-10-30T11:25:21.804Z] GhostDriver - Main - running on port 60508
+[INFO  - 2021-10-30T11:25:23.915Z] Session [12fa0ed0-3974-11ec-a102-efe1094567bd] - page.settings - {"XSSAuditingEnabled":false,"javascriptCanCloseWindows":true,"javascriptCanOpenWindows":true,"javascriptEnabled":true,"loadImages":true,"localToRemoteUrlAccessEnabled":false,"userAgent":"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1","webSecurityEnabled":true}
+[INFO  - 2021-10-30T11:25:23.916Z] Session [12fa0ed0-3974-11ec-a102-efe1094567bd] - page.customHeaders:  - {}
+[INFO  - 2021-10-30T11:25:23.916Z] Session [12fa0ed0-3974-11ec-a102-efe1094567bd] - Session.negotiatedCapabilities - {"browserName":"phantomjs","version":"2.1.1","driverName":"ghostdriver","driverVersion":"1.2.0","platform":"windows-10-32bit","javascriptEnabled":true,"takesScreenshot":true,"handlesAlerts":false,"databaseEnabled":false,"locationContextEnabled":false,"applicationCacheEnabled":false,"browserConnectionEnabled":false,"cssSelectorsEnabled":true,"webStorageEnabled":false,"rotatable":false,"acceptSslCerts":false,"nativeEvents":true,"proxy":{"proxyType":"direct"},"phantomjs.page.settings.javascriptEnabled":true}
+[INFO  - 2021-10-30T11:25:23.916Z] SessionManagerReqHand - _postNewSessionCommand - New Session Created: 12fa0ed0-3974-11ec-a102-efe1094567bd

File diff suppressed because it is too large
+ 0 - 0
gs.html


二進制
gs2.p


+ 1 - 1
hhh/GA_DB_KW_to_Sheep.py

@@ -32,7 +32,7 @@ def save_sheet(df,filename,tabname,startpos='A1'):
 
 def do_jobs():
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
-    cursor=db.query("SELECT k1.query,k1.clicks,k1.impressions,k1.ctr,(k2.clicks-k1.clicks)/k1.clicks*100 as growth FROM hhh.hhh_weekly_keywords k1, hhh_weekly_keywords k2 where k1.weeknum='27' and k2.weeknum='28' and k1.query=k2.query order by (k2.clicks-k1.clicks)/k1.clicks*100 desc;")
+    cursor=db.query("SELECT k1.query,k1.clicks,k1.impressions,k1.ctr,(k2.clicks-k1.clicks)/k1.clicks*100 as growth FROM hhh.hhh_weekly_keywords k1, hhh_weekly_keywords k2 where k1.weeknum='28' and k2.weeknum='29' and k1.query=k2.query order by (k2.clicks-k1.clicks)/k1.clicks*100 desc;")
     df = pd.DataFrame(columns=('query','clicks','impressions','ctr','growth'))
 
     idx=0

+ 149 - 0
hhh/GA_flow.py

@@ -0,0 +1,149 @@
+"""Hello Analytics Reporting API V4."""
+#!/usr/bin/python3
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import dataset
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+import threading
+from apiclient.discovery import build
+from oauth2client.service_account import ServiceAccountCredentials
+import dataset
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
+db.query('delete from hhh_view_bounce')
+#db.query('delete from hhh_weekly_affinity')
+
+table=db['hhh_view_bounce']
+
+SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
+KEY_FILE_LOCATION = 'c:\\keys\\choozmo-ga-beee24b7a4c1.json'
+VIEW_ID = '188916214'
+
+
+def initialize_analyticsreporting():
+  """Initializes an Analytics Reporting API V4 service object.
+
+  Returns:
+    An authorized Analytics Reporting API V4 service object.
+  """
+  credentials = ServiceAccountCredentials.from_json_keyfile_name(
+      KEY_FILE_LOCATION, SCOPES)
+
+  # Build the service object.
+  analytics = build('analyticsreporting', 'v4', credentials=credentials)
+
+  return analytics
+
+
+def get_report(analytics,body):
+  """Queries the Analytics Reporting API V4.
+
+  Args:
+    analytics: An authorized Analytics Reporting API V4 service object.
+  Returns:
+    The Analytics Reporting API V4 response.
+  """
+  return analytics.reports().batchGet(
+      body={
+        'reportRequests':body
+        # [
+        #{
+#          'viewId': VIEW_ID,
+#          'dateRanges': [{'startDate': '14daysAgo', 'endDate': 'today'}],
+#          'dateRanges': [{'startDate': '2021-05-30', 'endDate': '2021-06-05'}],
+
+#          'metrics': [{'expression': 'ga:users'}],
+#          'dimensions': [{'name': 'ga:sourceMedium'}]
+#          'dimensions': [{'name': 'ga:date'},{'name': 'ga:sourceMedium'}]
+
+#        }]
+      }
+  ).execute()
+
+
+def print_response(response):
+  """Parses and prints the Analytics Reporting API V4 response.
+
+  Args:
+    response: An Analytics Reporting API V4 response.
+  """
+  result=[]
+  for report in response.get('reports', []):
+    columnHeader = report.get('columnHeader', {})
+    dimensionHeaders = columnHeader.get('dimensions', [])
+    metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
+
+    for row in report.get('data', {}).get('rows', []):
+      dimensions = row.get('dimensions', [])
+      dateRangeValues = row.get('metrics', [])
+      ga_dict={}
+
+      for header, dimension in zip(dimensionHeaders, dimensions):
+#        print(header + ': ', dimension)
+        ga_dict[header]=dimension
+      
+      for i, values in enumerate(dateRangeValues):
+#        print('Date range:', str(i))
+        for metricHeader, value in zip(metricHeaders, values.get('values')):
+          ga_dict[metricHeader.get('name')]=value
+#          print(metricHeader.get('name') + ':', value)
+        result.append(ga_dict)
+    return result
+#        print(ga_dict)
+#product-post.php?id=2381
+
+#q=''
+#for i in range (2367,2382):
+#for i in range (2352,2368):
+#for i in range (2352,2363):
+#for i in range (2363,2368):
+##  if i==2381:
+#  if i==2367:
+#  if i==2362:
+
+ #   q+='id='+str(i)
+ # else:
+ #   q+='id='+str(i)+"|"
+
+#print(q)
+#sys.exit()
+
+def main():
+  analytics = initialize_analyticsreporting()
+
+  body=[{ 'viewId': VIEW_ID,
+  'dateRanges': [{'startDate': '2021-01-01', 'endDate': '2021-09-15'}],
+#  'filtersExpression': 'ga:bounceRate>20;ga:pageviews>20;ga:pagePath=@column)',
+#  'filtersExpression': 'ga:pageviews>20;ga:pagePath=@columns)',
+  'filtersExpression': 'ga:pagePath=@columns;ga:pageviews>20',
+  'metrics': [{'expression': 'ga:pageviews'},{'expression': 'ga:bounceRate'}],
+  'dimensions': [{'name': 'ga:pagePath'}]
+  }]
+
+  response = get_report(analytics,body)
+
+
+  ga_dict=print_response(response)
+#  print(ga_dict)
+  for elmt in ga_dict:
+      elmt['ga:pagePath']='https://www.hhh.com.tw'+elmt['ga:pagePath']
+      table.insert(elmt)
+      print(elmt)
+
+#        print(elmt['ga:sourceMedium'])
+
+
+if __name__ == '__main__':
+  main()
+

+ 1 - 1
hhh/SEO/GA_Daily.py

@@ -95,7 +95,7 @@ def main():
 #(FB_|facebook|IG_|LINE_|LINEMP_|qsear.ch)
 
   body=[{ 'viewId': VIEW_ID,
-  'dateRanges': [{'startDate': '2021-11-03', 'endDate': '2021-11-03'}],
+  'dateRanges': [{'startDate': '2021-11-05', 'endDate': '2021-11-05'}],
 
   'metrics': [{'expression': 'ga:users'},{'expression': 'ga:newusers'},{'expression': 'ga:sessions'},{'expression': 'ga:pageviews'},{'expression': 'ga:bounceRate'},{'expression': 'ga:pageviewsPerSession'}],
 #  'dimensions': [{'name': 'ga:pagePath'}],

+ 6 - 2
hhh/SEO/curl_click.py

@@ -17,7 +17,11 @@ fr.close()
 #for i in range(20):
 while True:
     l=random.choice(lst)
-    os.system('docker run --rm curlimages/curl:7.79.1 -L -v '+l)
+#    os.system('docker run --rm curlimages/curl:7.79.1 -L -v '+l)
+#    os.system('curl -Lx http://127.0.0.1:8118 '+l)
+    os.system('phantomjs '+l)
+
+
 #    os.system('curl '+l)
-    time.sleep(4)
+#    time.sleep(1)
 print(l)

+ 13 - 0
hhh/SEO/ghostdriver.log

@@ -0,0 +1,13 @@
+[INFO  - 2021-11-01T12:18:04.936Z] GhostDriver - Main - running on port 59680
+[INFO  - 2021-11-01T12:18:09.490Z] Session [c6a2bb90-3b0d-11ec-bdf3-61a532e84cfa] - page.settings - {"XSSAuditingEnabled":false,"javascriptCanCloseWindows":true,"javascriptCanOpenWindows":true,"javascriptEnabled":true,"loadImages":true,"localToRemoteUrlAccessEnabled":false,"userAgent":"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1","webSecurityEnabled":true}
+[INFO  - 2021-11-01T12:18:09.491Z] Session [c6a2bb90-3b0d-11ec-bdf3-61a532e84cfa] - page.customHeaders:  - {}
+[INFO  - 2021-11-01T12:18:09.491Z] Session [c6a2bb90-3b0d-11ec-bdf3-61a532e84cfa] - Session.negotiatedCapabilities - {"browserName":"phantomjs","version":"2.1.1","driverName":"ghostdriver","driverVersion":"1.2.0","platform":"windows-10-32bit","javascriptEnabled":true,"takesScreenshot":true,"handlesAlerts":false,"databaseEnabled":false,"locationContextEnabled":false,"applicationCacheEnabled":false,"browserConnectionEnabled":false,"cssSelectorsEnabled":true,"webStorageEnabled":false,"rotatable":false,"acceptSslCerts":false,"nativeEvents":true,"proxy":{"proxyType":"direct"}}
+[INFO  - 2021-11-01T12:18:09.491Z] SessionManagerReqHand - _postNewSessionCommand - New Session Created: c6a2bb90-3b0d-11ec-bdf3-61a532e84cfa
+[ERROR - 2021-11-01T12:18:13.655Z] Session [c6a2bb90-3b0d-11ec-bdf3-61a532e84cfa] - page.onError - msg: ReferenceError: Can't find variable: WeakMap
+
+  phantomjs://platform/console++.js:263 in error
+[ERROR - 2021-11-01T12:18:13.656Z] Session [c6a2bb90-3b0d-11ec-bdf3-61a532e84cfa] - page.onError - stack:
+  (anonymous function) (https://hhh.com.tw/assets/js/sweetalert2.all.min.js:1)
+  (anonymous function) (https://hhh.com.tw/assets/js/sweetalert2.all.min.js:1)
+
+  phantomjs://platform/console++.js:263 in error

+ 15 - 4
hhh/SEO/gsearch_test.py

@@ -4,6 +4,8 @@ import sys
 import time
 import dataset
 import os
+import datetime
+
 
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
@@ -12,16 +14,25 @@ table=db['hhh_contentgap_serp']
 #print(os.path.realpath('.'))
 curdir=os.path.realpath('.')
 
-fr=codecs.open(curdir+os.sep+'contentgap.txt','r','utf-8')
+#fr=codecs.open(curdir+os.sep+'contentgap.txt','r','utf-8')
+fr=codecs.open(curdir+os.sep+'hhh\\seo\\contentgap.txt','r','utf-8')
+
 lines=fr.readlines()
 lst=[]
 for l in lines:
     lst.append(l.replace('\n',''))
 
 
-for l in lst[25:]:
+for l in lst:
+
+#for l in lst[25:]:
+#for l in lst[26:]:
+
     time.sleep(5)
-    results=googlesearch.search(l, num_results=150)
+##    results=googlesearch.search(l, num_results=150,proxy="https://109.173.102.90:8000/")
+    results=googlesearch.search(l, num_results=150,proxy="https://217.147.1.166:40998/")
+#    results=googlesearch.search(l, num_results=150)
+
     idx=0
     for r in results:
         print(idx)
@@ -29,7 +40,7 @@ for l in lst[25:]:
         if 'hhh.com.tw' in r:
             print('found')
             print(idx)
-            table.insert({'kw':l,'ranking':idx})
+            table.insert({'kw':l,'ranking':idx,'dt':datetime.datetime.now()})
             db.commit()
             break
         idx+=1

+ 39 - 0
hhh/SEO/gstest.py

@@ -0,0 +1,39 @@
+from bs4 import BeautifulSoup
+from requests import get
+
+
+def search(term, num_results=10, lang="en", proxy=None):
+    usr_agent = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
+                      'Chrome/61.0.3163.100 Safari/537.36'}
+
+    def fetch_results(search_term, number_results, language_code):
+        escaped_search_term = search_term.replace(' ', '+')
+
+        google_url = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,
+                                                                              language_code)
+        proxies = None
+        if proxy:
+            if proxy[:5]=="https":
+                proxies = {"https":proxy} 
+            else:
+                proxies = {"http":proxy}
+        
+        response = get(google_url, headers=usr_agent, proxies=proxies)    
+        response.raise_for_status()
+
+        return response.text
+
+    def parse_results(raw_html):
+        soup = BeautifulSoup(raw_html, 'html.parser')
+        result_block = soup.find_all('div', attrs={'class': 'g'})
+        for result in result_block:
+            link = result.find('a', href=True)
+            title = result.find('h3')
+            if link and title:
+                yield link['href']
+
+    html = fetch_results(term, num_results, lang)
+    return list(parse_results(html))
+
+search('test')

+ 2 - 0
hhh/SEO/hhh_666_edges.py

@@ -93,9 +93,11 @@ def re_get_webdriver():
         driver = webdriver.Remote(
             command_executor='http://127.0.0.1:6666/wd/hub')
         driver.set_window_size(1400,1000)
+        return
     except:
         driver=None
         return None
+    driver=None
 
 def run_once(url):
     global driver

+ 6 - 1
hhh/SEO/hhh_777_edges.py

@@ -1,5 +1,6 @@
 #import redis
 import time
+import traceback
 #import json
 from selenium import webdriver
 from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@@ -87,7 +88,8 @@ def re_get_webdriver():
             ls[0].restart()
 
     time.sleep(4)
-
+#    if driver is not None:
+#        driver.close()
 #    options = webdriver.EdgeOptions()
     try:
         driver = webdriver.Remote(
@@ -95,6 +97,8 @@ def re_get_webdriver():
         driver.set_window_size(1400,1000)
         return
     except:
+        import traceback
+        traceback.print_exc()
         driver=None
         return None
     driver=None
@@ -105,6 +109,7 @@ def run_once(url):
     if i<=3 or driver is None:
 #    if True:
         re_get_webdriver()
+        time.sleep(3)
     if driver is None:
         return
     try:

+ 0 - 2
hhh/SEO/hhh_clickjob.py

@@ -59,8 +59,6 @@ def process_query(qs):
             webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
     time.sleep(2)
 
-
-
     elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
     idx=1
     ranking=-1

+ 34 - 0
hhh/SEO/hhh_phantom2.py

@@ -0,0 +1,34 @@
+from selenium import webdriver
+import time
+import random
+import codecs
+
+
+
+fpath=__file__
+fpath=fpath.replace('hhh_phantom2.py','urls.csv')
+
+print(fpath)
+
+lst=[]
+fr=codecs.open(fpath,'r','utf-8')
+lines=fr.readlines()
+for l in lines:
+    elmts=l.split(',')
+    lst.append('https://www.hhh.com.tw'+elmts[0])
+fr.close()
+
+#lst=['13781','3649','2116']
+for i in range(9999):
+    driver = webdriver.PhantomJS()
+    driver.set_window_size(1120, 550)
+    l=random.choice(lst)
+#    driver.get("https://www.hhh.com.tw/cases/detail/"+l+"/index.php")
+    driver.get(l)
+    print(driver.current_url)
+#    driver.implicitly_wait (3)
+    time.sleep(4)
+    driver.save_screenshot('c:/tmp/test.png')
+    driver.quit()
+
+

+ 17 - 0
hhh/SEO/loop_win.py

@@ -0,0 +1,17 @@
+import os
+import subprocess
+import time
+
+from multiprocessing import Process
+
+def f(name):
+    os.system('python hhh_phantom.py')
+
+if __name__ == '__main__':
+    for i in range(10):
+        p = Process(target=f, args=('bob',))
+        p.start()
+#        p.
+#        p.join()
+#    p.join()
+

+ 23 - 0
hhh/SEO/save.js

@@ -0,0 +1,23 @@
+var system = require('system');
+var page = require('webpage').create();
+
+var url = system.args[1];
+var destination = system.args[2];
+
+page.settings.resourceTimeout = 10000;
+
+setTimeout(function(){
+    setInterval(function () {
+var fs = require('fs');
+var page = require('webpage').create();
+page.open(url, function () {
+    console.log(page.content);
+try {
+    fs.write(destination, page.content, 'w');
+    } catch(e) {
+        console.log(e);
+    }
+    phantom.exit();
+});
+    }, 20000);
+}, 1);

+ 135 - 0
hhh/SEO/seo_find_rank.py

@@ -0,0 +1,135 @@
+#import redis
+import time
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+import dataset
+import codecs
+import random
+import requests
+import time
+import sys
+import docker
+import codecs
+import random
+import os
+import time
+
+
+driver=None
+headers = {
+        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+
+def send_msg(kw):
+    params = {"message": "處理關鍵字: "+kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs,domain):
+    print('processing...')
+    print(qs)
+    q=qs
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+    for i in range(10):
+        elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
+        idx=1
+        print("idx="+str(idx))
+        ranking=-1
+        for elmt in elmts:
+            href=elmt.get_attribute('href')
+            txt=elmt.text
+            if len(txt)>10:
+                if domain in href:
+                    print(domain+" in "+ str(href)+" i:"+str(i))
+                    return i
+
+        elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
+        webdriver.ActionChains(driver).move_to_element(elmt).perform()
+        webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+
+    time.sleep(2)
+
+
+
+
+def re_get_webdriver():
+    global driver
+    result=[]
+    client = docker.from_env()
+    ls=client.containers.list()
+    print(ls)
+    for l in ls:
+        print(l.name)
+        if 'p4444' in l.name:
+            l.restart()
+            print('restarted')
+            time.sleep(6)
+        else:
+            print('name not correct')
+
+#    options = webdriver.EdgeOptions()
+    try:
+        print('trying 4444....')
+        options = webdriver.ChromeOptions()
+        driver = webdriver.Remote(
+            command_executor='http://127.0.0.1:4444/wd/hub',desired_capabilities=options.to_capabilities())
+        print('4444 done')
+        driver.set_window_size(1400,1000)
+        print('driver is fine')
+        return
+    except:
+        print('driver except')
+        driver=None
+        return None
+
+
+
+
+
+
+def run_once(url,domain):
+    global driver
+    i=random.randint(0,20)
+    re_get_webdriver()
+    if driver is None:
+        print('driver is none')
+        return
+    try:
+        return process_query(url,domain)
+    except:
+        print('process_query exception')
+
+kw='ai行銷 choozmo'
+domain='choozmo.com'
+rank=run_once( kw,domain )
+if rank is not None:
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
+    table=db['seo_clickjobs']
+    print('upserting....')
+#    table.insert({'kw':kw,'domain':domain,'category':'choozmo','page':rank},keys=['kw'])
+    table.insert({'kw':kw,'domain':domain,'category':'choozmo','page':rank})
+
+    db.commit()
+
+print(rank)
+time.sleep(0.001)

+ 144 - 0
hhh/SEO/test_fb_live.py

@@ -0,0 +1,144 @@
+#import redis
+import time
+import traceback
+#import json
+from selenium import webdriver
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+import time
+import os
+import urllib.parse
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support import expected_conditions as EC
+
+import codecs
+import random
+import requests
+import time
+import sys
+import docker
+import codecs
+import random
+import os
+import time
+
+
+driver=None
+headers = {
+        "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
+        "Content-Type": "application/x-www-form-urlencoded"
+}
+
+
+
+def send_msg(kw):
+    params = {"message": "處理關鍵字: "+kw}  
+    r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
+
+
+def empty_query(q):
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+
+
+def process_query(qs):
+    q=qs[0]
+    domain=qs[2]
+    cnt=qs[1]
+    global driver
+    googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
+    driver.get(googleurl)
+    time.sleep(3)
+    if cnt > 0:
+        for i in range(cnt):
+            elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
+            webdriver.ActionChains(driver).move_to_element(elmt).perform()
+            webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(3)
+
+
+
+    elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
+    idx=1
+    ranking=-1
+    for elmt in elmts:
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+        if len(txt)>10:
+#            if 'hhh.com.tw' in href:
+#            if 'hhh.com.tw' in href:
+#            if 'ai.choozmo.com' in href:
+            if domain in href:
+#            if 'searchome.net' in href:
+                webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                break
+
+
+def re_get_webdriver():
+    global driver
+    result=[]
+#    client = docker.from_env()
+#    ls=client.containers.list()
+#    print(ls)
+#    for l in ls:
+#        if 'p17777' in l.name:
+#            ls[0].restart()
+
+#    time.sleep(4)
+    if driver is not None:
+        print('closing....')
+        driver.quit()
+#    options = webdriver.EdgeOptions()
+    try:
+        driver = webdriver.Remote(
+            command_executor='http://127.0.0.1:17777/wd/hub')
+        driver.set_window_size(1400,1000)
+        return
+    except:
+        import traceback
+        traceback.print_exc()
+        driver=None
+        return None
+    driver=None
+
+def run_once(url):
+    global driver
+    i=random.randint(0,30)
+    if i<=3 or driver is None:
+#    if True:
+        re_get_webdriver()
+        time.sleep(3)
+    if driver is None:
+        return
+    try:
+        url='https://www.facebook.com/ntdtv.com.tw/videos/399997895164699'
+        driver.execute_script('window.open("'+url+'","_blank");')
+        driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
+        time.sleep(2)
+    except:
+        print('exception')
+
+
+client = docker.from_env()
+ls=client.containers.list()
+print(ls)
+for l in ls:
+    if 'p17777' in l.name:
+        ls[0].restart()
+time.sleep(4)
+
+
+#lst=['https://www.hhh.com.tw/columns/detail/3427/index.php']
+    #for i in range(20):
+lst=['']
+while True:
+    l=random.choice(lst)
+    print(l)
+#    driver.get(l)
+    run_once(l)
+#    time.sleep(2)
+
+

+ 10 - 0
hhh/SEO/win_links.py

@@ -0,0 +1,10 @@
+import os
+import time
+for i in range(999):
+#    os.system('"C:\Program Files\Links\links.exe" https://www.hhh.com.tw')
+    os.system('"C:\Program Files\Links\links.exe" -no-g -source https://www.hhh.com.tw/columns/detail/1156/index.php')
+
+    time.sleep(4)
+    os.system('"C:\Program Files\Links\links.exe" -no-g -source  https://www.hhh.com.tw/columns/detail/10000/index.php')
+    time.sleep(4)
+

+ 4 - 2
hhh/hhh_fetch_gap20.py

@@ -19,7 +19,7 @@ import dataset
 
 db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
 
-table=db['gap20v5']
+table=db['gap20v6']
 def find_master_by_designer(masters,designer):
     for m in masters:
         if m['designer']==designer:
@@ -214,8 +214,10 @@ qlist=[]
 #get_designer_statistics("無印 風 客廳 site:hhh.com.tw")
 #get_designer_statistics("牆 面 裝飾 site:hhh.com.tw")
 #get_designer_statistics("臥榻 櫃 site:hhh.com.tw")
-get_designer_statistics("工業 風 辦公室 site:hhh.com.tw")
+#get_designer_statistics("工業 風 辦公室 site:hhh.com.tw")
 
+#get_designer_statistics("小坪 數 浴室 乾 濕 分離 site:hhh.com.tw")
+get_designer_statistics("電視 牆 收納 櫃 site:hhh.com.tw")
 
 
 

+ 2 - 2
hhh/reports/content_gap_ranking.py

@@ -33,7 +33,7 @@ def save_sheet(df,filename,tabname,startpos='A1'):
 def do_jobs():
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/hhh?charset=utf8mb4')
 #    cursor=db.query('SELECT distinct kw,if(ranking IS NULL ,"未上榜",ranking ) as r   FROM hhh.hhh_contentgap_serp where id >=155 order by r asc;')
-    cursor=db.query('SELECT distinct kw,if(ranking IS NULL ,"未上榜",ranking ) as r   FROM hhh.hhh_contentgap_serp where id >=294 order by r asc;')
+    cursor=db.query('SELECT distinct kw,if(ranking IS NULL ,"未上榜",ranking ) as r   FROM hhh.hhh_contentgap_serp where id >=394 order by r asc;')
 
     df = pd.DataFrame(columns=('kw','r'))
 
@@ -42,7 +42,7 @@ def do_jobs():
     for c in cursor:
         df.loc[idx]=[c['kw'],c['r']]
         idx+=1
-    save_sheet(df,'ContentGap_Ranking','ranking_20211022')
+    save_sheet(df,'ContentGap_Ranking','ranking_20211105')
 
 t = threading.Thread(target = do_jobs)
 t.start()

+ 2 - 2
urlshortener/main.py

@@ -27,8 +27,8 @@ app.add_middleware(
     allow_headers=["*"],
 )
 
-@app.post("/seturl")
-async def get_domain(domain_name):
+@app.post("/seturl/{full_url}")
+async def get_domain(full_url):
     url = "https://similarweb2.p.rapidapi.com/pageoverview"
 
     if 'http' not in domain_name:

Some files were not shown because too many files changed in this diff