parseutils.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. # -*- coding: utf-8 -*-
  2. DB_NAME = 'google_poi'
  3. SHOP_LIST_TABLE = 'shop_list'
  4. SHOP_LIST_TABLE_COL = ['name', 'lon', 'lat', 'city', 'area',
  5. 'rating', 'user_ratings_total', 'category', 'price_level',
  6. 'addr', 'tel', 'services', 'products', 'choices',
  7. 'facilities', 'groups', 'plans', 'payments', 'safeties',
  8. 'specials', 'barrierlevels', 'items',
  9. 'open_now', 'periods', 'weekday_text', 'reviews',
  10. 'google_url', 'crawler_date']
  11. element_list = {
  12. 'category': ['button', {'jsaction':'pane.rating.category'}],
  13. 'rating': ['ol', {}, 'aria-label'],
  14. 'user_ratings_total': ['span', {'jsaction':'pane.rating.moreReviews'}],
  15. 'price_level':['span', {'jsan':'0.aria-label'}]
  16. }
  17. intro_list = {
  18. '服務選項': ['services','service'],
  19. '產品/服務': ['products','product'],
  20. '用餐選擇': ['choices','choice'],
  21. '設施': ['facilities','facility'],
  22. '客層族群':['groups','group'],
  23. '規劃':['plans','plan'],
  24. '付款方式':['payments','payment'],
  25. '健康與安全':['safeties','safety'],
  26. '特色':['specials','special'],
  27. '無障礙程度':['barrierlevels','barrierlevel'],
  28. '詳細資料':['items','item'],
  29. }
  30. week_list = {
  31. '星期日': 0,
  32. '星期一': 1,
  33. '星期二': 2,
  34. '星期三': 3,
  35. '星期四': 4,
  36. '星期五': 5,
  37. '星期六': 6,
  38. }
  39. def blank_check(value):
  40. while value.startswith(' '):
  41. value = value[1:]
  42. while value.endswith(' '):
  43. value = value[:-1]
  44. return value
  45. def value_check(key, value):
  46. value = blank_check(value)
  47. if key == 'rating':
  48. value = float(value.replace(' 星級',''))
  49. elif key == 'user_ratings_total':
  50. value = int(value.replace(' 則評論','').replace(',',''))
  51. elif key == 'price_level':
  52. value = len(['$' for i in value if i == '$'])
  53. if value == 0:
  54. value = ''
  55. else:
  56. value = int(value)
  57. return value