parseutils.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # -*- coding: utf-8 -*-
  2. DB_NAME = 'google_poi'
  3. SHOP_LIST_TABLE = 'shop_list3'
  4. SHOP_LIST_TABLE_COL = ['name', 'fid', 'city', 'area',
  5. 'rating', 'user_ratings_total', 'category', 'price_level',
  6. 'addr', 'tel', 'services', 'products', 'choices',
  7. 'facilities', 'groups', 'plans', 'payments', 'safeties',
  8. 'specials', 'barrierlevels', 'items' ,
  9. 'open_now', 'periods', 'weekday_text','reviews',
  10. 'shop_photo','menu_photo',
  11. 'google_url', 'item_url', 'keyword', 'crawler_date']
  12. # SHOP_LIST_TABLE_COL = ['unique_id', 'name', 'lon', 'lat', 'city', 'area',
  13. # 'rating', 'user_ratings_total', 'category', 'price_level',
  14. # 'addr', 'tel', 'services', 'products', 'choices',
  15. # 'facilities', 'groups', 'plans', 'payments', 'safeties',
  16. # 'specials', 'barrierlevels', 'items' ,
  17. # 'open_now', 'periods', 'weekday_text','reviews',
  18. # 'shop_photo','menu_photo',
  19. # 'google_url', 'item_url', 'keyword', 'crawler_date']
  20. element_list = {
  21. 'category': ['button', {'jsaction':'pane.rating.category'}],
  22. 'rating': ['ol', {}, 'aria-label'],
  23. 'user_ratings_total': ['span', {'jsaction':'pane.rating.moreReviews'}],
  24. 'price_level':['span', {'jsan':'0.aria-label'}]
  25. }
  26. intro_list = {
  27. '服務選項': ['services','service'],
  28. '產品/服務': ['products','product'],
  29. '用餐選擇': ['choices','choice'],
  30. '設施': ['facilities','facility'],
  31. '客層族群':['groups','group'],
  32. '規劃':['plans','plan'],
  33. '付款方式':['payments','payment'],
  34. '健康與安全':['safeties','safety'],
  35. '特色':['specials','special'],
  36. '無障礙程度':['barrierlevels','barrierlevel'],
  37. '詳細資料':['items','item']
  38. }
  39. week_list = {
  40. '星期日': 0,
  41. '星期一': 1,
  42. '星期二': 2,
  43. '星期三': 3,
  44. '星期四': 4,
  45. '星期五': 5,
  46. '星期六': 6,
  47. }
  48. def blank_check(value):
  49. while value.startswith(' '):
  50. value = value[1:]
  51. while value.endswith(' '):
  52. value = value[:-1]
  53. return value
  54. def value_check(key, value):
  55. value = blank_check(value)
  56. if key == 'rating':
  57. value = float(value.replace(' 星級',''))
  58. elif key == 'user_ratings_total':
  59. value = int(value.replace(' 則評論','').replace(',',''))
  60. elif key == 'price_level':
  61. value = len(['$' for i in value if i == '$'])
  62. if value == 0:
  63. value = ''
  64. else:
  65. value = int(value)
  66. return value