Your Name 3 lat temu
rodzic
commit
66faad6f49

+ 1 - 0
README.md

@@ -0,0 +1 @@
+## test

+ 106 - 0
hhh/auto_clicks.py

@@ -0,0 +1,106 @@
+from selenium import webdriver
+import time
+import networkx as nx
+import dataset
+import pickle
+import codecs
+from selenium.webdriver.common import keys
+from selenium.webdriver.common.keys import Keys
+import sys
+import os
+import time
+import random
+import re
+import pandas as pd
+from browser_common import JBrowser
+import datetime
+import dataset
+
+
+def get_designer_statistics(designer_list):
+    jb=JBrowser()
+    jb.set_profile_path("Profile 7")
+    #q='萬寶隆空間設計團隊'
+    #q='含仰空間設計'
+    #q='承炫裝修有限公司'
+    #q='寓子設計'
+    #q='水水設計'
+    #q='廣延空間設計'
+    #q='里摩室內裝修設計'
+#    qlist=['三宅一秀','元均制作','采品室內設計','比沙列室內','澤序空間設計','禾築國際設計','恆岳空間設計','優尼客空間設計','摩登雅舍','亞維空間設計','采舍空間設計','雅典設計','IS國際設計','上築空間設計','京璽國際','禾亞國際室內','我思空間設計','雲方室內設計','允庭室內裝修','富億空間設計','格綸設計','浩室設計','豐聚室內設計','唐林建築室內','沛沛國際室內設計','']
+    #q=qlist[0]
+
+    details=[]
+    masters=[]
+
+#    for q in qlist:
+    for q in designer_list:
+
+        googleurl='https://www.google.com/search?q='+q
+
+        jb.get(googleurl)
+
+        driver=jb.get_driver()
+        time.sleep(3)
+
+
+        elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
+        idx=1
+        ranking=-1
+        for elmt in elmts:
+            href=elmt.get_attribute('href')
+            txt=elmt.text
+            if len(txt)>10:
+                if 'hhh.com.tw' in href:
+                    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+                    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+                    time.sleep(5)
+                    driver.quit()
+                    return
+
+                print(href)
+                print(txt)
+    #            table.insert({'designer':q,'title':txt,'url':href,'ranking':idx})
+                details.append({'designer':q,'title':txt,'url':href,'ranking':idx})
+
+                idx+=1
+
+        time.sleep(3)
+#    print(masters)
+#    print(details)
+    return {'masters':masters,'details':details}
+
+def find_master_by_designer(masters,designer):
+    for m in masters:
+        if m['designer']==designer:
+            return m
+
+# 寓子設計
+#qlist=['元均制作']
+#qlist=['三宅一秀']
+#qlist=['采品室內設計']
+#qlist=['寓子設計']
+#qlist=['綵韻室內設計','春雨時尚空間','阿曼空間設計','雅典設計','境庭國際設計']
+#qlist=['豐聚室內裝修','張馨室內設計','尚藝室內裝修','富億空間設計','比沙列室內裝修']
+#qlist=['森境王俊宏設計','格綸設計','齊舍設計','采舍空間設計','大琚空間設計']
+#qlist=['將作空間','昱承室內裝修','YHS DESIGN','德本迪室內設計','東風室內設計']
+#qlist=['陶璽空間設計','惹雅國際設計','浩室設計','藝谷空間設計','IS國際設計']
+#qlist=['摩登雅舍室內','星葉室內裝修','浩室設計','演拓空間','千綵胤空間']
+#qlist=['京璽國際','元典設計','朱英凱室內設計','亞維空間設計','馥築時尚設計']
+#qlist=['文儀室內裝修','寓子設計','恆岳空間設計','卓林室內設計','歐德傢俱']
+#qlist=['大久空間設計','成綺空間設計','知域設計','尚展空間設計','演繹動線空間']
+#qlist=['苡希創意設計','玖柞設計','維耕設計','昱森室內設計','上築空間設計']
+#qlist=['HATCH合砌設計','至文室內裝修','上陽設計','禾禾設計','聯寬室內裝修']
+
+#設計
+#qlist=['三宅一秀','萬寶隆空間設計','含仰空間設計','元均制作','承炫裝修']
+
+qlist=['春雨時尚空間','萬寶隆空間設計','含仰空間設計','綵韻室內設計','雅典設計','境庭國際設計','張馨室內設計','尚藝室內裝修','豐聚室內裝修','阿曼空間設計','尚藝室內裝修','富億空間設計','齊舍設計','大琚空間設計','將作空間','昱承室內裝修','YHS DESIGN','德本迪室內設計','森境王俊宏設計','綵韻室內設計','寓子設計','東風室內設計','陶璽空間設計','惹雅國際設計','藝谷空間設計']
+
+while True:
+    q=random.choice(qlist)
+    print(q)
+    results=get_designer_statistics([q])
+    print(results)
+
+time.sleep(9999)

+ 6 - 0
hhh/batch_fetch.py

@@ -0,0 +1,6 @@
+import os
+import sys
+
+os.chdir('C:\\gitlab\\kw_tools\\kw_tools\\hhh')
+for i in range(30):
+    os.system('python gspace_fetch_ranks.py')

+ 30 - 0
hhh/gsc_collect.py

@@ -0,0 +1,30 @@
+import traceback
+import copy
+import operator
+import codecs
+import sys
+import os
+import searchconsole
+
+account = searchconsole.authenticate(client_config='c:/keys/service/client_secret.json',credentials='c:/keys/service/credentials.json')
+webproperty = account['https://hhh.com.tw/']
+
+
+#report=webproperty.query.range('2021-01-01', '2020-01-02').dimension('page','query').get()
+report=webproperty.query.range('2021-01-01', '2020-05-04').dimension('query').filter('query', '裝修設計', 'contains').get()
+total_pos=0
+total_count=0
+total_clicks=0
+for r in report:
+    print(r)
+    query=r[0]
+    clicks=r[1]
+    impressions=r[2]
+    ctr=r[3]
+    position=r[4]
+    total_clicks+=int(clicks)
+    total_pos+=float(position)
+    total_count+=1
+
+print(total_pos/total_count)
+print(total_clicks)

+ 23 - 7
hhh/gsc_test.py

@@ -5,21 +5,37 @@ import codecs
 import sys
 import os
 import searchconsole
+import dataset
+import datetime
+import random
+
+db = dataset.connect('mysql://choozmo:pAssw0rd@139.162.121.30:33306/hhh?charset=utf8mb4')
+table=db['monitor_kw_raw']
+
 
 account = searchconsole.authenticate(client_config='c:/keys/service/client_secret.json',credentials='c:/keys/service/credentials.json')
 webproperty = account['https://hhh.com.tw/']
 
+allkw=['裝修設計','裝潢','老屋翻新','客廳','玄關設計','臥室','北歐風','廚房','裝潢費用','玄關','廚房收納','裝修','客廳擺設','高雄室內設計','室內設計','小坪數','設計師','廚房設計','客廳裝潢','客廳設計','裝潢設計']
+# SELECT keyword,max(dt) FROM hhh.monitor_kw_raw group by keyword;
 
+kw=random.choice(allkw)
 #report=webproperty.query.range('2021-01-01', '2020-01-02').dimension('page','query').get()
-report=webproperty.query.range('2021-01-01', '2020-04-01').dimension('query').filter('query', '裝修設計', 'contains').get()
+#kw='裝修設計'
+
+#for kw in all
+report=webproperty.query.range('2021-01-01', '2020-05-06').dimension('query').filter('query',kw , 'contains').get()
+data=[]
 for r in report:
     print(r)
-    print(r[1])
-    print(r[4])
 
-#    print(r['clicks'])
-#    print(r['position'])
+    query=r[0]
+    clicks=r[1]
+    impressions=r[2]
+    ctr=r[3]
+    position=r[4]
+    data.append({'keyword':kw,'query':r[0],'clicks':r[1],'impressions':r[2],'ctr':ctr,'dt':datetime.datetime.now() })
 
-#for r in report.rows:
-#    print(r)
 
+for r in data:
+    table.insert(r)

+ 6 - 1
hhh/gspace_fetch_ranks.py

@@ -16,7 +16,11 @@ from browser_common import JBrowser
 import datetime
 import dataset
 
-db = dataset.connect('sqlite:///c:/tmp/hhh.db')
+db = dataset.connect('mysql://choozmo:pAssw0rd@139.162.121.30:33306/hhh?charset=utf8mb4')
+table=db['monitor_kw_raw']
+
+##db = dataset.connect('sqlite:///c:/tmp/hhh.db')
+#db = dataset.connect('sqlite:///c:/tmp/hhh.db')
 
 def get_designer_statistics(designer_list):
     jb=JBrowser()
@@ -74,6 +78,7 @@ def find_master_by_designer(masters,designer):
         if m['designer']==designer:
             return m
 
+# 寓子設計
 #qlist=['元均制作']
 #qlist=['三宅一秀']
 #qlist=['采品室內設計']

+ 119 - 0
hhh/gspace_keywords.py

@@ -0,0 +1,119 @@
+from selenium import webdriver
+import time
+import networkx as nx
+import dataset
+import pickle
+import codecs
+from selenium.webdriver.common import keys
+from selenium.webdriver.common.keys import Keys
+import sys
+import os
+import time
+import re
+import jieba
+from browser_common import JBrowser
+import jinja2
+import df2sheet
+import pandas as pd
+
+
+#db = dataset.connect('sqlite:///c:/tmp/gspace.db')
+db = dataset.connect('sqlite:///:memory:')
+jieba.load_userdict('C:\\src\\farmcodes\\jared\\browser\\dict.txt')
+jieba.load_userdict('C:\\tmp\\dict.txt')
+
+table=db['tmp']
+#db.query('delete from ranking')
+#db.query('delete from hhh_ranking')
+jb=JBrowser()
+jb.set_profile_path("Profile 7")
+
+#kw='收納櫃'
+#kw='收納櫃推薦'
+#kw='收納櫃ikea'
+#kw='收納櫃塑膠'
+#kw='收納櫃設計'
+#kw='收納'
+#kw='系統櫃'
+#kw='抽屜'
+#kw='系統櫃推薦'
+#kw='系統櫃價格'
+#kw='系統櫃廠商'
+#kw='系統櫃ptt'
+#kw='系統櫃材質'
+#kw='系統櫃樣式'
+#kw='系統櫃品牌'
+#kw='系統櫃衣櫃尺寸'
+kw='系統櫃板材'
+
+googleurl='https://www.google.com/search?q='+kw
+jb.get(googleurl)
+
+for i in range(3):
+
+
+    driver=jb.get_driver()
+    time.sleep(3)
+
+
+    elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
+    idx=1
+    ranking=-1
+    for elmt in elmts:
+        href=elmt.get_attribute('href')
+        txt=elmt.text
+#        print(href)
+#        print(txt)
+        try:
+            elmt2=elmt.find_element_by_xpath("../../../div")
+            fulldesc=elmt2.text
+            print(fulldesc)
+            seg_list = jieba.cut(fulldesc, cut_all=True)
+            for s in seg_list:
+                print(s)
+                table.insert({'kw':s})
+        except:
+            print('except')
+        idx+=1
+
+    elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
+    webdriver.ActionChains(driver).move_to_element(elmt).perform()
+    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
+    time.sleep(1)
+
+    
+##    time.sleep(9999)
+#    time.sleep(2)
+
+
+from jinja2 import Environment, FileSystemLoader
+THIS_DIR = os.path.dirname(os.path.abspath(__file__))
+j2_env = Environment(loader=FileSystemLoader('c:/tmp'))
+
+
+#'http://icons-for-free.com/free-icons/png/128/1312099.png'
+
+
+data=[]
+
+
+
+df = pd.DataFrame(columns=('keywords','cnt'))
+
+cursor=db.query('select kw as kw,count(*) as cnt from tmp group by kw having count(*)>1 order by count(*) desc')
+for c in cursor:
+    print(c['kw'])
+    print(c['cnt'])
+    data.append({'kw':c['kw'],'cnt':c['cnt']})
+    df.loc[idx]=[c['kw'],c['cnt']]
+    idx+=1
+
+df2sheet.save_sheet(df,'May-Event',kw,startpos='A1')
+
+
+#output=j2_env.get_template('hhh_kw.tmpl').render(data=data)
+
+#import codecs
+#fw=codecs.open('c:/tmp/hhh_kw.html', 'w','utf-8')
+#fw.write(output)
+#fw.close()

+ 73 - 0
hhh/gtree2.py

@@ -0,0 +1,73 @@
+import suggests
+import networkx as nx
+import pyvis
+import time
+from pyvis.network import Network
+import pickle
+
+
+#kw='覆髓'p
+#kw='塗氟'
+#kw='口掃機'
+#kw='牙醫助理'
+#kw='牙材'
+#kw='牙醫師公會'
+#kw='防齲'
+#kw='齒模'
+#kw='金屬牙套'
+#kw='醫療法'
+#kw='牙醫師手冊'
+#kw='貝氏刷牙'
+#kw='牙醫積分'
+#kw='牙醫師'
+#kw='牙醫全聯會'
+#kw='牙醫系'
+#kw='台大牙醫'
+#kw='成大牙醫'
+#kw='陽明牙醫'
+#kw='北醫牙醫'
+#kw='醫學系公費生'
+#kw='醫學系自費生'
+#kw='北醫牙醫'
+
+#kw='牙醫學會'
+#kw='牙醫總額'
+#kw='牙醫健保'
+#kw='文化資產'
+#kw='藝文團體'
+#kw='書房 設計'
+#kw='室內設計'
+#kw='2021風水擺設'
+#kw='電視牆'
+#kw='系統櫃'
+#kw='收納'
+kw='裝修'
+
+#kw='廣告投放策略'
+
+s = suggests.suggests.get_suggests(kw, source='google')
+G = nx.Graph()
+#G = pickle.load( open( "gs2.p", "rb" ) )
+
+
+for sg in s['suggests']:
+    G.add_edge(kw,sg,weight=1)
+
+    print(sg)
+    time.sleep(1)
+    s2 = suggests.suggests.get_suggests(sg, source='google')
+    for elmt in s2['suggests']:
+        G.add_edge(sg,elmt,weight=1)
+
+
+G.remove_edges_from( list(nx.selfloop_edges(G)))
+
+pickle.dump( G, open( "gs2.p", "wb" ) )
+
+pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
+pyG.from_nx(G)
+pyG.show('gs.html')
+
+
+
+

+ 19 - 0
hhh/my_migrate.py

@@ -0,0 +1,19 @@
+import traceback
+import copy
+import operator
+import codecs
+import sys
+import os
+import searchconsole
+import dataset
+import datetime
+import random
+
+local_db = dataset.connect('sqlite:///c:/tmp/hhh.db')
+
+#db = dataset.connect('mysql://choozmo:pAssw0rd@139.162.121.30:33306/hhh?charset=utf8mb4')
+cursor=local_db.query('select * from monitor_kw_raw')
+#table=db['monitor_kw_raw']
+for c in cursor:
+    print(c)
+

+ 7 - 0
similar_server/main.py

@@ -31,6 +31,13 @@ app.add_middleware(
 )
 
 
+@app.get("/query/{qq}")
+async def query(qq):
+    print(qq)
+    return JSONResponse(content={'query':qq})
+
+
+
 
 @app.get("/domain/{domain_name}")
 async def read_item(domain_name):

+ 21 - 0
similar_server/tests/apitest.py

@@ -0,0 +1,21 @@
+import requests
+import json
+url = "https://similarweb2.p.rapidapi.com/pageoverview"
+
+querystring = {"website":"https://www.yahoo.com.tw"}
+
+headers = {
+   "x-rapidapi-key": "6dd30886e0msh7aefc9a0a794398p1896f2jsn275c45475609",
+    "x-rapidapi-host": "similarweb2.p.rapidapi.com"
+    }
+
+
+
+response = requests.request("GET", url, headers=headers, params=querystring)
+
+js=json.loads(response.text)
+print(response.text)
+
+desc=js['siteDescription']
+ustr=desc.encode('utf-8').decode('utf-8')
+print(ustr)

+ 41 - 0
similar_server/tests/serptest.py

@@ -0,0 +1,41 @@
+import requests
+import urllib.parse
+import sys
+import json
+url = "https://google-search3.p.rapidapi.com/api/v1/serp/"
+
+q=urllib.parse.quote("幸福空間")
+#q=urllib.parse.quote("設計師")
+
+print(q)
+pl={'query':'q='+q+'&num=100','website':'https://rapidapi.com'}
+payload=str(pl)
+#payload = '{"query": "q={}&num=100",  "website": "https://rapidapi.com" }'.format(q)
+
+#print(payload)
+
+
+headers = {
+    'content-type': "application/json",
+    'x-rapidapi-key': "6dd30886e0msh7aefc9a0a794398p1896f2jsn275c45475609",
+    'x-rapidapi-host': "google-search3.p.rapidapi.com"
+    }
+
+#response = requests.request("POST", url, data=payload.replace("'",'"'), headers=headers)
+
+url = "https://google-search3.p.rapidapi.com/api/v1/search/q="+payload+"&cr=countryTW&num=10"
+
+response = requests.request("GET", url, headers=headers)
+
+
+print(response.text)
+js=json.loads(response.text)
+print(js)
+excludes=['youtube','facebook','twitter']
+for r in js['results']:
+    for ee in excludes:
+        if 'youtube' not in r['link']:
+            print(r['title'])
+            print(r['link'])
+
+sys.exit()

+ 1 - 1
similar_web/index.html

@@ -168,7 +168,7 @@
       <!--
       <button type="button" class="btn btn-primary btn-block mb-4 ripple-surface">Place order</button>
 -->
-<div id="main" style="width: 600px;height:400px;"></div>
+<div id="main" style="width: 650px;height:400px;"></div>
 
 
 

+ 5 - 0
tools/search_test.py

@@ -0,0 +1,5 @@
+# Get the first 20 hits for: "Breaking Code" WordPress blog
+from googlesearch import search
+
+for url in search('"Breaking Code" WordPress blog', stop=20):
+    print(url)