noodlesloves 3 år sedan
förälder
incheckning
75b31a18bb
5 ändrade filer med 193 tillägg och 0 borttagningar
  1. BIN
      utility/__init__.py
  2. 6 0
      utility/connect.py
  3. 93 0
      utility/googlemapsearch.sql
  4. 37 0
      utility/log.py
  5. 57 0
      utility/parseutils.py

BIN
utility/__init__.py


+ 6 - 0
utility/connect.py

@@ -0,0 +1,6 @@
+MYSQL_CONFIG = {
+    "MYSQL_HOST": "db.ptt.cx",
+    "MYSQL_USER": "choozmo",
+    "MYSQL_PASSWORD": "pAssw0rd",
+    "MYSQL_PORT": 3306
+}

+ 93 - 0
utility/googlemapsearch.sql

@@ -0,0 +1,93 @@
+CREATE TABLE `shop_list` (
+   `id` int NOT NULL AUTO_INCREMENT,
+   `name` VARCHAR(100),
+   `lon` DOUBLE,
+   `lat` DOUBLE,
+   `city` VARCHAR(10),
+   `area` VARCHAR(10),
+   `rating` DOUBLE,
+   `user_ratings_total` DOUBLE,
+   `category` VARCHAR(20),
+   `price_level` VARCHAR(10),
+   `addr` VARCHAR(100),
+   `tel` VARCHAR(20),
+   
+   `services` JSON ,
+   `products` JSON ,
+   `choices` JSON ,
+   `facilities` JSON ,
+   `groups` JSON ,
+   `plans` JSON ,
+   `payments` JSON ,
+   `safeties` JSON ,
+   `specials` JSON ,
+   `barrierlevels` JSON ,
+   `items` JSON ,
+
+   `open_now` VARCHAR(10),
+   `periods` JSON,
+   `weekday_text` JSON,
+   
+   `reviews` JSON,
+
+   `google_url` VARCHAR(200),
+   `crawler_date` char(20) NOT NULL,
+   PRIMARY KEY (`id`),
+   UNIQUE KEY (`google_url`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+
+CREATE TABLE `shop_item_list` (
+   `id` int NOT NULL AUTO_INCREMENT,
+   `google_url` VARCHAR(200),
+   `services` VARCHAR(100),
+   `products` VARCHAR(100),
+   `choices` VARCHAR(100),
+   `facilities` VARCHAR(100),
+   `groups` VARCHAR(100),
+   `plans` VARCHAR(100),
+   `payments` VARCHAR(100),
+   `safeties` VARCHAR(100),
+   `specials` VARCHAR(100),
+   `barrierlevels` VARCHAR(100),
+   `items` VARCHAR(100),
+   `crawler_date` char(20) NOT NULL,
+   PRIMARY KEY (`id`),
+   UNIQUE KEY (`google_url`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+
+CREATE TABLE `shop_time_list` (
+   `id` int NOT NULL AUTO_INCREMENT,
+   `google_url` VARCHAR(200),
+   `open_now` VARCHAR(20),
+   `day` int,
+   `open` VARCHAR(20),
+   `close` VARCHAR(20),
+   `crawler_date` char(20) NOT NULL,
+   PRIMARY KEY (`id`),
+   UNIQUE KEY (`google_url`,`day`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+
+CREATE TABLE `shop_reviews_list` (
+   `id` VARCHAR(30),
+   `google_url` VARCHAR(200),
+   `author_name` VARCHAR(20),
+   `profile_photo_url` VARCHAR(200),
+   `rating` int,
+   `text` VARCHAR(500),
+   `created_at` VARCHAR(20),
+   `crawler_date` char(20) NOT NULL,
+   UNIQUE KEY (`id`,`google_url`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+
+
+CREATE TABLE `shop_reviews_photo_list` (
+   `id` VARCHAR(30),
+   `google_url` VARCHAR(200),
+   `path` VARCHAR(200),
+   `crawler_date` char(20) NOT NULL,
+   PRIMARY KEY (`id`),
+   UNIQUE KEY (`id`,`google_url`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;

+ 37 - 0
utility/log.py

@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+import os
+import sys
+from logging import DEBUG
+from logging import Formatter
+from logging import StreamHandler
+from logging import getLogger
+from logging.handlers import RotatingFileHandler
+
+
+def init_logging(module_name, log_file, level=DEBUG, max_bytes=65536000,
+                 backup_count=3):
+    """get the logger for the module_name and the log_file
+    Args:
+        module_name (str): the module name
+        log_file (str): the log file
+        level:
+        max_bytes: 65536000, 64MB
+        backup_count:
+    Returns:
+        Logger: the logger   
+    """
+    
+    logger = getLogger(module_name)
+    if logger.handlers:
+        return logger
+    logger.setLevel(level)
+    logger.propagate = True
+    handler = RotatingFileHandler(
+        filename=log_file, maxBytes=max_bytes, backupCount=backup_count)
+    formatter = Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    handler = StreamHandler(sys.stdout)
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    return logger

+ 57 - 0
utility/parseutils.py

@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+element_list = {
+    'category': ['button', {'jsaction':'pane.rating.category'}],
+    'rating': ['ol', {}, 'aria-label'],
+    'user_ratings_total': ['span', {'jsaction':'pane.rating.moreReviews'}],
+    'price_level':['span', {'jsan':'0.aria-label'}]
+}
+
+
+intro_list = {
+    '服務選項': ['services','service'],
+    '產品/服務': ['products','product'],
+    '用餐選擇': ['choices','choice'],
+    '設施': ['facilities','facility'],
+    '客層族群':['groups','group'],
+    '規劃':['plans','plan'],
+    '付款方式':['payments','payment'],
+    '健康與安全':['safeties','safety'],
+    '特色':['specials','special'],
+    '無障礙程度':['barrierlevels','barrierlevel'],
+    '詳細資料':['items','item'],
+}
+
+week_list = {
+    '星期日': 0, 
+    '星期一': 1, 
+    '星期二': 2, 
+    '星期三': 3,
+    '星期四': 4, 
+    '星期五': 5, 
+    '星期六': 6,  
+}
+
+
+def blank_check(value):
+    while value.startswith(' '):
+        value = value[1:]
+
+    while value.endswith(' '):
+        value = value[:-1]
+    return value
+
+
+def value_check(key, value):
+    value = blank_check(value)
+    if key == 'rating':
+        value = float(value.replace(' 星級',''))
+    elif key == 'user_ratings_total':
+        value = int(value.replace(' 則評論','').replace(',',''))
+    elif key == 'price_level':
+        value = len(['$' for i in value if i == '$'])
+        if value == 0:
+            value = ''
+        else:
+            value = int(value)
+
+    return value