|
@@ -16,13 +16,10 @@ from utility.connect import *
|
|
|
from datetime import datetime
|
|
|
import pandas as pd
|
|
|
import dataset
|
|
|
-import requests
|
|
|
-import time
|
|
|
-import json
|
|
|
-import re
|
|
|
-import sys, os
|
|
|
-import socket
|
|
|
-import brotli
|
|
|
+import requests, random, time, json
|
|
|
+import re, sys, os
|
|
|
+import socket, brotli
|
|
|
+
|
|
|
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
|
import urllib.parse
|
|
|
chrome_window=False
|
|
@@ -280,16 +277,17 @@ def process_web_request(db, driver, area_num, keyword):
|
|
|
|
|
|
|
|
|
def check_area_code(db, kw):
|
|
|
- table_name = '{}.{}'.format(MYSQL_CONFIG['MYSQL_DB'], TABLE_AREACODES)
|
|
|
- result = db.query('select distinct(kw) from {}'.format(table_name))
|
|
|
- result = [i['kw'] for i in result]
|
|
|
+ if kw:
|
|
|
+ table_name = '{}.{}'.format(MYSQL_CONFIG['MYSQL_DB'], TABLE_AREACODES)
|
|
|
+ result = db.query('select distinct(kw) from {}'.format(table_name))
|
|
|
+ result = [i['kw'] for i in result]
|
|
|
|
|
|
- if kw not in result:
|
|
|
- try:
|
|
|
- sql = 'insert into {} (select num,"{}" as kw,0 as expand from {}) '.format(table_name, kw, TABLE_LAT_LON)
|
|
|
- db.query(sql)
|
|
|
- except:
|
|
|
- traceback.print_exc()
|
|
|
+ if kw not in result:
|
|
|
+ try:
|
|
|
+ sql = 'insert into {} (select num,"{}" as kw, 0 as expand from {}) '.format(table_name, kw, TABLE_LAT_LON)
|
|
|
+ db.query(sql)
|
|
|
+ except:
|
|
|
+ traceback.print_exc()
|
|
|
|
|
|
|
|
|
def page_down_(driver, time_):
|
|
@@ -339,7 +337,6 @@ def main():
|
|
|
|
|
|
db = dataset.connect('mysql://{}:{}@{}/{}?charset=utf8mb4'.format( MYSQL_CONFIG['MYSQL_USER'],
|
|
|
MYSQL_CONFIG['MYSQL_PASSWORD'], MYSQL_CONFIG['MYSQL_HOST'], MYSQL_CONFIG['MYSQL_DB']))
|
|
|
- iddict = build_cache(db)
|
|
|
store_list_table = db[TABLE_STORE_LIST]
|
|
|
table2 = db[TABLE_PROGRESS_LIST]
|
|
|
|
|
@@ -352,41 +349,50 @@ def main():
|
|
|
|
|
|
print('drvier start...')
|
|
|
driver = brower_start(port)
|
|
|
- # check_area_code(db, globalkw)
|
|
|
- area_num=None
|
|
|
- if len(sys.argv) > 4 :
|
|
|
- repkw = sys.argv[1]
|
|
|
- repnum = sys.argv[2]
|
|
|
- if 'SCAN' in repkw:
|
|
|
- job = scan_job(db, repnum)
|
|
|
- else:
|
|
|
- job = get_next_job(db, repeat=True, repkw=repkw, repnum=repnum)
|
|
|
- else:
|
|
|
+ check_area_code(db, globalkw)
|
|
|
+ for i in range(2):
|
|
|
+ area_num=None
|
|
|
+ # if len(sys.argv) > 4 :
|
|
|
+ # repkw = sys.argv[1]
|
|
|
+ # repnum = sys.argv[2]
|
|
|
+ # if 'SCAN' in repkw:
|
|
|
+ # job = scan_job(db, repnum)
|
|
|
+ # else:
|
|
|
+ # job = get_next_job(db, repeat=True, repkw=repkw, repnum=repnum)
|
|
|
+ # else:
|
|
|
job = get_next_job(db, repkw=globalkw)
|
|
|
- print(job)
|
|
|
- keyword = job['kw']
|
|
|
- latitude = job['lat'] #緯度
|
|
|
- longitude = job['lon'] #精度
|
|
|
- area_num = job['num']
|
|
|
-
|
|
|
- safe_string = urllib.parse.quote_plus(keyword)
|
|
|
- url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
|
|
|
- print(url)
|
|
|
- prev_cnt=0
|
|
|
- cursor = db.query('select count(*) as cnt from {} where num="{}" '.format(TABLE_STORE_LIST, str(area_num)))
|
|
|
- for c in cursor:
|
|
|
- prev_cnt = c['cnt']
|
|
|
- break
|
|
|
- driver.get(url)
|
|
|
- time.sleep(2)
|
|
|
- keyin_keyword(driver, keyword)
|
|
|
- page_down_(driver, 10)
|
|
|
- process_web_request(db, driver, area_num, keyword)
|
|
|
-
|
|
|
-
|
|
|
- table2.insert({'kw':keyword,'num':job['num']},['kw'])
|
|
|
- db.query('update {} set expand = 1 where num="'.format(TABLE_AREACODES)+str(job['num'])+'" and kw="'+keyword+'" ')
|
|
|
-
|
|
|
+ print(job)
|
|
|
+
|
|
|
+ keyword = job['kw']
|
|
|
+ globalkw = keyword
|
|
|
+ latitude = job['lat'] #緯度
|
|
|
+ longitude = job['lon'] #精度
|
|
|
+ area_num = job['num']
|
|
|
+
|
|
|
+ safe_string = urllib.parse.quote_plus(keyword)
|
|
|
+ for j in range(5):
|
|
|
+ iddict = build_cache(db)
|
|
|
+ if j != 0:
|
|
|
+ latitude_ = float(latitude) + (random.randint(-999,999) / 10000)
|
|
|
+ longitude_ = float(longitude) + (random.randint(-999,999) / 10000)
|
|
|
+ else:
|
|
|
+ latitude_, longitude_ = latitude, longitude
|
|
|
+ url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude_, longitude_)
|
|
|
+ print(url)
|
|
|
+ prev_cnt=0
|
|
|
+ cursor = db.query('select count(*) as cnt from {} where num="{}" '.format(TABLE_STORE_LIST, str(area_num)))
|
|
|
+ for c in cursor:
|
|
|
+ prev_cnt = c['cnt']
|
|
|
+ break
|
|
|
+ driver.get(url)
|
|
|
+ time.sleep(2)
|
|
|
+ keyin_keyword(driver, keyword)
|
|
|
+ # page_down_(driver, 3)
|
|
|
+ process_web_request(db, driver, area_num, keyword)
|
|
|
+ time.sleep(1)
|
|
|
+
|
|
|
+ table2.insert({'kw':keyword,'num':job['num']},['kw'])
|
|
|
+ db.query(f'update {TABLE_AREACODES} set expand = 1 where num="'+str(job['num'])+'" and kw="'+keyword+'" ')
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|