noodles 3 роки тому
батько
коміт
cb1ac6f6f2
1 змінених файлів з 62 додано та 0 видалено
  1. 62 0
      get_google_id.py

+ 62 - 0
get_google_id.py

@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+from selenium import webdriver
+from selenium.webdriver.common.action_chains import ActionChains
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.common.by import By
+
+from bs4 import BeautifulSoup
+
+from utility import database_access as DA
+from utility.parseutils import *
+from utility.connect import *
+import dataset
+import sys
+from datetime import datetime
+import pandas as pd
+import time
+import json
+import re, os
+
+def brower_start(port):
+    options = webdriver.ChromeOptions()
+#    browser = webdriver.Chrome(options=options)
+
+    browser = webdriver.Remote(
+        command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
+        desired_capabilities=options.to_capabilities()
+    )
+    return browser
+
+
+def main():
+    port=4444
+    if len(sys.argv) >1:
+        port=int(sys.argv[1])
+        print('restart docker p{}'.format(port))
+        os.system('sudo docker container restart p'+str(port))
+        time.sleep(8)
+    print('drvier start...')
+    driver = brower_start(port)
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
+    table=db['shop_list2']
+
+    for i in range(1):
+        result = db.query('select * from shop_list2 where google_id is null ORDER BY RAND() limit 20')
+        result = pd.DataFrame([i for i in result])
+
+        for key, group in result.iterrows():
+            unique_id = group['unique_id']
+            item_url = group['item_url']
+            url = url = 'view-source:' + item_url
+            driver.get(url)
+            time.sleep(0.5)
+            sourcetext = driver.page_source
+            google_id = re.findall('null,\\\\"ChIJ[a-zA-Z0-9-_+]*\\\\"', sourcetext)[0].replace('null,','').replace('\\','').replace('"','')
+            print(google_id)
+            table.upsert({'unique_id': unique_id,'google_id':google_id},['unique_id'])
+            
+
+if __name__ == '__main__':
+    main()