Your Name 4 lat temu
rodzic
commit
fb0742d86b
3 zmienionych plików z 200 dodań i 0 usunięć
  1. 48 0
      hhh/browser_common.py
  2. 43 0
      hhh/df2sheet.py
  3. 109 0
      hhh/gspace_ranking.py

+ 48 - 0
hhh/browser_common.py

@@ -0,0 +1,48 @@
+from selenium import webdriver
+import time
+import networkx as nx
+import dataset
+import pickle
+import codecs
+from selenium.webdriver.common.keys import Keys
+import sys
+import os
+import time
+import re
+
+class JBrowser:
+    def __init__(self):
+        os.chdir("C:\\Program Files\\Google\\Chrome\\Application\\")
+
+
+    def set_profile_path(self,profilepath):
+        self.profilepath=profilepath
+        option = webdriver.ChromeOptions()
+        option.add_argument('--disable-web-security') 
+        option.add_argument('--allow-running-insecure-content') 
+        option.add_argument("--user-data-dir=C:\\Users\\jared\\AppData\\Local\\Google\\Chrome\\User Data\\"+self.profilepath+"\\")
+        self.option=option
+        driver = webdriver.Chrome(options=option)
+        self.driver=driver
+#        executor_url = driver.command_executor._url
+#        session_id = driver.session_id
+
+    def scrolling(self,pgnum):
+        ub = self.driver.find_element_by_css_selector('body')
+        for i in range(pgnum):
+            ub.send_keys(Keys.PAGE_DOWN)
+            if pgnum>1:
+                time.sleep(0.5)
+
+    def get(self,url):
+        self.driver.get(url)
+
+    def get_driver(self):
+        return self.driver
+
+#jb=JBrowser()
+#jb.set_profile_path("Profile 7")
+
+#jb.get('https://m.facebook.com/ufi/reaction/profile/browser/?ft_ent_identifier=1981894498624379&ref=page_internal')
+
+#time.sleep(3)

+ 43 - 0
hhh/df2sheet.py

@@ -0,0 +1,43 @@
+#!/usr/bin/python3
+import sys
+import codecs
+import traceback
+import requests
+import re
+import pandas as pd
+import random
+import urllib
+import json
+import gspread
+import datetime
+from gspread_pandas import Spread, Client
+from oauth2client.service_account import ServiceAccountCredentials
+import os
+
+def save_sheet(df,filename,tabname,startpos='A1'):
+
+    scope = ['https://spreadsheets.google.com/feeds',
+            'https://www.googleapis.com/auth/drive']
+
+    credentials = ServiceAccountCredentials.from_json_keyfile_name('c:\\keys\\service\\gspread.json', scope)
+    gc = gspread.authorize(credentials)
+    spread = Spread(filename,creds=credentials)
+
+    spread.df_to_sheet(df, index=False, sheet=tabname, start=startpos, replace=False)
+
+def get_sheets(filename):
+
+    scope = ['https://spreadsheets.google.com/feeds',
+            'https://www.googleapis.com/auth/drive']
+
+    credentials = ServiceAccountCredentials.from_json_keyfile_name('c:\\keys\\service\\gspread.json', scope)
+    gc = gspread.authorize(credentials)
+#    files=gc.list_spreadsheet_files()
+#    print(files)
+    spread = Spread(filename,creds=credentials)
+    for ws in spread.sheets:
+        print(ws.title)
+
+get_sheets('designer_ranking')
+#    spread.df_to_sheet(df, index=False, sheet=tabname, start=startpos, replace=False)
+#

+ 109 - 0
hhh/gspace_ranking.py

@@ -0,0 +1,109 @@
+from selenium import webdriver
+import time
+import networkx as nx
+import dataset
+import pickle
+import codecs
+from selenium.webdriver.common import keys
+from selenium.webdriver.common.keys import Keys
+import sys
+import os
+import time
+import re
+import pandas as pd
+import df2sheet
+from browser_common import JBrowser
+import datetime
+
+def get_designer_statistics(designer_list):
+    jb=JBrowser()
+    jb.set_profile_path("Profile 7")
+    #q='萬寶隆空間設計團隊'
+    #q='含仰空間設計'
+    #q='承炫裝修有限公司'
+    #q='寓子設計'
+    #q='水水設計'
+    #q='廣延空間設計'
+    #q='里摩室內裝修設計'
+#    qlist=['三宅一秀','元均制作','采品室內設計','比沙列室內','澤序空間設計','禾築國際設計','恆岳空間設計','優尼客空間設計','摩登雅舍','亞維空間設計','采舍空間設計','雅典設計','IS國際設計','上築空間設計','京璽國際','禾亞國際室內','我思空間設計','雲方室內設計','允庭室內裝修','富億空間設計','格綸設計','浩室設計','豐聚室內設計','唐林建築室內','沛沛國際室內設計','']
+    #q=qlist[0]
+
+    details=[]
+    masters=[]
+    for q in qlist:
+
+        googleurl='https://www.google.com/search?q='+q
+
+        jb.get(googleurl)
+
+        driver=jb.get_driver()
+        time.sleep(3)
+
+
+        elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
+        idx=1
+        ranking=-1
+        for elmt in elmts:
+            href=elmt.get_attribute('href')
+            txt=elmt.text
+            if len(txt)>10:
+                if 'hhh.com.tw' in href:
+                    ranking=idx
+    #                h_table.insert({'designer':q,'title':txt,'url':href,'ranking':idx})
+                    masters.append({'designer':q,'title':txt,'url':href,'ranking':idx})
+
+                print(href)
+                print(txt)
+    #            table.insert({'designer':q,'title':txt,'url':href,'ranking':idx})
+                details.append({'designer':q,'title':txt,'url':href,'ranking':idx})
+
+                idx+=1
+
+        time.sleep(3)
+
+#    print(masters)
+#    print(details)
+    return {'masters':masters,'details':details}
+
+def find_master_by_designer(masters,designer):
+    for m in masters:
+        if m['designer']==designer:
+            return m
+
+#qlist=['元均制作']
+#qlist=['三宅一秀']
+#qlist=['采品室內設計']
+#qlist=['寓子設計']
+#qlist=['綵韻室內設計','春雨時尚空間','阿曼空間設計','雅典設計','境庭國際設計']
+#qlist=['豐聚室內裝修','張馨室內設計','尚藝室內裝修','富億空間設計','比沙列室內裝修']
+#qlist=['森境王俊宏設計','格綸設計','齊舍設計','采舍空間設計','大琚空間設計']
+#qlist=['將作空間','昱承室內裝修','YHS DESIGN','德本迪室內設計','東風室內設計']
+#qlist=['陶璽空間設計','惹雅國際設計','浩室設計','藝谷空間設計','IS國際設計']
+#qlist=['摩登雅舍室內','星葉室內裝修','浩室設計','演拓空間','千綵胤空間']
+#qlist=['京璽國際','元典設計','朱英凱室內設計','亞維空間設計','馥築時尚設計']
+#qlist=['文儀室內裝修','寓子設計','恆岳空間設計','卓林室內設計','歐德傢俱']
+#qlist=['大久空間設計','成綺空間設計','知域設計','尚展空間設計','演繹動線空間']
+#qlist=['苡希創意設計','玖柞設計','維耕設計','昱森室內設計','上築空間設計']
+qlist=['HATCH合砌設計','至文室內裝修','上陽設計','禾禾設計','聯寬室內裝修']
+
+#設計
+#qlist=['三宅一秀','萬寶隆空間設計','含仰空間設計','元均制作','承炫裝修']
+
+
+
+
+results=get_designer_statistics(qlist)
+print(results)
+
+
+for q in qlist:
+    df = pd.DataFrame(columns=('designer','title','url','ranking','幸福空間排名','更新時間'))
+    r=find_master_by_designer(results['masters'],q)
+    idx=0
+    dtstr=datetime.datetime.now().strftime("%Y-%m/%d %H:%M:%S")
+    for d in results['details']:
+        if d['designer']==q:
+            df.loc[idx]=[d['designer'],d['title'],d['url'],d['ranking'],r['ranking'],dtstr]
+            idx+=1
+    df2sheet.save_sheet(df,'designer_ranking',q,startpos='A1')
+