|  | @@ -0,0 +1,393 @@
 | 
	
		
			
				|  |  | +# -*- coding: utf-8 -*-
 | 
	
		
			
				|  |  | +from seleniumwire import webdriver
 | 
	
		
			
				|  |  | +from selenium.webdriver.common.action_chains import ActionChains
 | 
	
		
			
				|  |  | +from selenium.webdriver.common.keys import Keys
 | 
	
		
			
				|  |  | +from selenium.webdriver.support import expected_conditions as EC
 | 
	
		
			
				|  |  | +from selenium.webdriver.support.wait import WebDriverWait
 | 
	
		
			
				|  |  | +from selenium.webdriver.common.by import By
 | 
	
		
			
				|  |  | +import selenium
 | 
	
		
			
				|  |  | +import traceback
 | 
	
		
			
				|  |  | +from bs4 import BeautifulSoup
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +from utility import database_access as DA
 | 
	
		
			
				|  |  | +from utility.parseutils import *
 | 
	
		
			
				|  |  | +from utility.connect import *
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +from datetime import datetime
 | 
	
		
			
				|  |  | +import pandas as pd
 | 
	
		
			
				|  |  | +import dataset
 | 
	
		
			
				|  |  | +import requests
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  | +import json
 | 
	
		
			
				|  |  | +import re
 | 
	
		
			
				|  |  | +import sys, os
 | 
	
		
			
				|  |  | +import socket
 | 
	
		
			
				|  |  | +import brotli
 | 
	
		
			
				|  |  | +from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 | 
	
		
			
				|  |  | +import urllib.parse
 | 
	
		
			
				|  |  | +chrome_window=False
 | 
	
		
			
				|  |  | +#chrome_window=True
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +globalkw=None
 | 
	
		
			
				|  |  | +proxyport=8787
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def build_cache(db):
 | 
	
		
			
				|  |  | +    id_dict={}
 | 
	
		
			
				|  |  | +    cursor = db.query('SELECT place_id FROM {}.{};'.format(MYSQL_CONFIG['MYSQL_DB'], TABLE_STORE_LIST))
 | 
	
		
			
				|  |  | +    for c in cursor:
 | 
	
		
			
				|  |  | +        id_dict[c['place_id']]=1
 | 
	
		
			
				|  |  | +    return id_dict
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def brower_start(port):
 | 
	
		
			
				|  |  | +    global proxyport
 | 
	
		
			
				|  |  | +    global chrome_window
 | 
	
		
			
				|  |  | +    print(proxyport)
 | 
	
		
			
				|  |  | +    options = webdriver.ChromeOptions()
 | 
	
		
			
				|  |  | +    if chrome_window:
 | 
	
		
			
				|  |  | +        browser = webdriver.Chrome(
 | 
	
		
			
				|  |  | +            desired_capabilities=options.to_capabilities()
 | 
	
		
			
				|  |  | +        )
 | 
	
		
			
				|  |  | +    else:
 | 
	
		
			
				|  |  | +        chrome_options = webdriver.ChromeOptions()
 | 
	
		
			
				|  |  | +        chrome_options.add_argument('--proxy-server=host.docker.internal:'+str(proxyport))  # Specify your Kubernetes service-name here
 | 
	
		
			
				|  |  | +        chrome_options.add_argument('--ignore-certificate-errors')
 | 
	
		
			
				|  |  | +        chrome_options.add_argument("--no-sandbox")
 | 
	
		
			
				|  |  | +        chrome_options.add_argument("--disable-dev-shm-usage")
 | 
	
		
			
				|  |  | +        browser = webdriver.Remote(
 | 
	
		
			
				|  |  | +            command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
 | 
	
		
			
				|  |  | +            desired_capabilities=chrome_options.to_capabilities(),
 | 
	
		
			
				|  |  | +            seleniumwire_options={'addr':'0.0.0.0','port':proxyport,'auto_config': False}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            )
 | 
	
		
			
				|  |  | +#            seleniumwire_options = {'addr': '172.17.0.2','port':4444})
 | 
	
		
			
				|  |  | +        browser.set_window_size(1400,1000)
 | 
	
		
			
				|  |  | +    return browser
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def keyin_keyword(driver, keyword):
 | 
	
		
			
				|  |  | +    button = driver.find_element_by_id("searchbox")
 | 
	
		
			
				|  |  | +    driver.implicitly_wait(30)
 | 
	
		
			
				|  |  | +    ActionChains(driver).move_to_element(button).send_keys(keyword).send_keys(Keys.RETURN).perform()
 | 
	
		
			
				|  |  | +    time.sleep(3)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def scan_job(db, kw):
 | 
	
		
			
				|  |  | +    result = {'kw' : kw}
 | 
	
		
			
				|  |  | +    table_name = '{}.{}'.format(MYSQL_CONFIG['MYSQL_DB'],MYSQL_CONFIG['TABLE_CONV_LOG'])
 | 
	
		
			
				|  |  | +    cursor = db.query('select t1.num,next-prev as diff from {} t1, \
 | 
	
		
			
				|  |  | +        (SELECT num,max(id) mid  FROM {} group by num  ) t2 \
 | 
	
		
			
				|  |  | +            where t1.id=t2.mid having diff>0 order by rand()'.format(table_name, table_name))
 | 
	
		
			
				|  |  | +    for c in cursor:
 | 
	
		
			
				|  |  | +        result['num']=c['num']
 | 
	
		
			
				|  |  | +        break
 | 
	
		
			
				|  |  | +    cursor = db.query('select lat,lon,loc from {} where num ="'.format(TABLE_LAT_LON)+str(result['num'])+'"')
 | 
	
		
			
				|  |  | +    for c in cursor:
 | 
	
		
			
				|  |  | +        result['lat'] = c['lat']
 | 
	
		
			
				|  |  | +        result['lon'] = c['lon']
 | 
	
		
			
				|  |  | +        result['loc'] = c['loc']
 | 
	
		
			
				|  |  | +        return result
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def get_next_job(db, repeat=False, repkw=None, repnum=None):
 | 
	
		
			
				|  |  | +    global globalkw
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    result={}
 | 
	
		
			
				|  |  | +    cursor = db.query('select kw, num from {} where expand = 0 order by rand()'.format(TABLE_AREACODES))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    for c in cursor:
 | 
	
		
			
				|  |  | +        if repkw is None:
 | 
	
		
			
				|  |  | +            repkw = c['kw']
 | 
	
		
			
				|  |  | +        result['kw'] = c['kw']
 | 
	
		
			
				|  |  | +        result['num'] = c['num']
 | 
	
		
			
				|  |  | +        break
 | 
	
		
			
				|  |  | +    if repkw is not None:
 | 
	
		
			
				|  |  | +        result['kw'] = repkw
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    if result.get('num') is not None:
 | 
	
		
			
				|  |  | +        cursor = db.query('select lat,lon,loc from {} where num ="{}"'.format(TABLE_LAT_LON, str(result['num'])))
 | 
	
		
			
				|  |  | +        for c in cursor:
 | 
	
		
			
				|  |  | +            result['lat']=c['lat']
 | 
	
		
			
				|  |  | +            result['lon']=c['lon']
 | 
	
		
			
				|  |  | +            result['loc']=c['loc']
 | 
	
		
			
				|  |  | +            break
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    if repeat and repkw!= 'REP':
 | 
	
		
			
				|  |  | +        result['kw']=repkw
 | 
	
		
			
				|  |  | +        result['num']=repnum
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    if 'REP' in repkw:
 | 
	
		
			
				|  |  | +        if repnum=='REP':
 | 
	
		
			
				|  |  | +            repnum=None
 | 
	
		
			
				|  |  | +            cursor = db.query('select num from {} order by rand() limit 1'.format(TABLE_STORE_LIST))
 | 
	
		
			
				|  |  | +            for c in cursor:
 | 
	
		
			
				|  |  | +                repnum=c['num']
 | 
	
		
			
				|  |  | +                break
 | 
	
		
			
				|  |  | +        if repnum is None:
 | 
	
		
			
				|  |  | +            cursor = db.query('select num from {} order by rand() limit 1'.format(TABLE_STORE_LIST))
 | 
	
		
			
				|  |  | +            for c in cursor:
 | 
	
		
			
				|  |  | +                repnum=c['num']
 | 
	
		
			
				|  |  | +                break
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        cursor = db.query('select  lat_txt,lon_txt,keyword,num from {} where num="{}" limit 1'.format(TABLE_STORE_LIST, str(repnum)))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        for c in cursor:
 | 
	
		
			
				|  |  | +            result['kw']=c['keyword']
 | 
	
		
			
				|  |  | +            result['num']=c['num']
 | 
	
		
			
				|  |  | +            result['lat']=c['lat_txt']
 | 
	
		
			
				|  |  | +            result['lon']=c['lon_txt']
 | 
	
		
			
				|  |  | +            result['loc']=''
 | 
	
		
			
				|  |  | +            return result
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    if repeat:
 | 
	
		
			
				|  |  | +        cursor = db.query('select  lat_txt,lon_txt,keyword from {} order by rand() limit 1'.format(TABLE_STORE_LIST))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        for c in cursor:
 | 
	
		
			
				|  |  | +            result['kw']=c['keyword']
 | 
	
		
			
				|  |  | +            result['lat']=c['lat_txt']
 | 
	
		
			
				|  |  | +            result['lon']=c['lon_txt']
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    return result
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def write_to_file(jsobj,fname):
 | 
	
		
			
				|  |  | +    import codecs
 | 
	
		
			
				|  |  | +    fw=codecs.open(fname,'w','utf-8')
 | 
	
		
			
				|  |  | +    fw.write(str(jsobj))
 | 
	
		
			
				|  |  | +    fw.close()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def parsing_js(orig):
 | 
	
		
			
				|  |  | +    resultobj=[]
 | 
	
		
			
				|  |  | +    content=""
 | 
	
		
			
				|  |  | +    lines=orig.split('\n')
 | 
	
		
			
				|  |  | +    for l in lines:
 | 
	
		
			
				|  |  | +        newl=l.replace('\\"','"')
 | 
	
		
			
				|  |  | +        newl=newl.replace('\\"','"')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        content+=newl
 | 
	
		
			
				|  |  | +    result=re.search(r'\[\["',content)
 | 
	
		
			
				|  |  | +    print(result)
 | 
	
		
			
				|  |  | +    content_begin=result.start()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    result=re.search(r'\]\]"',content)
 | 
	
		
			
				|  |  | +    print(result)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    content_end=result.end()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    jscontent=content[content_begin:content_end-1]
 | 
	
		
			
				|  |  | +#    write_to_file(jscontent,'c:/tmp/debug.txt')
 | 
	
		
			
				|  |  | +    jsobj=json.loads(jscontent)
 | 
	
		
			
				|  |  | +    for x in jsobj[0][1][1:]:
 | 
	
		
			
				|  |  | +        print(x[14][11])
 | 
	
		
			
				|  |  | +        print(x[14][9])
 | 
	
		
			
				|  |  | +        reviews_cnt=None
 | 
	
		
			
				|  |  | +        photo=None
 | 
	
		
			
				|  |  | +        rating=None
 | 
	
		
			
				|  |  | +        biz_id=None
 | 
	
		
			
				|  |  | +        loc_x=None
 | 
	
		
			
				|  |  | +        loc_y=None
 | 
	
		
			
				|  |  | +        addr_elmts=None
 | 
	
		
			
				|  |  | +        tel=None
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            rating=x[14][4][7]
 | 
	
		
			
				|  |  | +            reviews_cnt=x[14][4][8]
 | 
	
		
			
				|  |  | +        except:
 | 
	
		
			
				|  |  | +            traceback.print_exc()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            photo=x[14][37][0][0][0]
 | 
	
		
			
				|  |  | +            num_photos=x[14][37][0][0][6][1]
 | 
	
		
			
				|  |  | +        except:
 | 
	
		
			
				|  |  | +            traceback.print_exc()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            loc_x=x[14][37][0][0][29][0]
 | 
	
		
			
				|  |  | +            loc_y=x[14][37][0][0][29][1]
 | 
	
		
			
				|  |  | +        except:
 | 
	
		
			
				|  |  | +            traceback.print_exc()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            biz_id=x[14][57][2]
 | 
	
		
			
				|  |  | +            tel=x[14][178][0][3]
 | 
	
		
			
				|  |  | +        except:
 | 
	
		
			
				|  |  | +            traceback.print_exc()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            addr_elmts=str(x[14][82])
 | 
	
		
			
				|  |  | +        except:
 | 
	
		
			
				|  |  | +            traceback.print_exc()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        category=str(x[14][13])
 | 
	
		
			
				|  |  | +        topic=str(x[14][89])
 | 
	
		
			
				|  |  | +        print(x[14][13])
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        print(x[14][10])
 | 
	
		
			
				|  |  | +        print(x[14][2])
 | 
	
		
			
				|  |  | +        print(x[14][78])
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            resultobj.append({'name':x[14][11],'fid':x[14][10],'addr':x[14][2][0],'addr_elmts':addr_elmts,'place_id':x[14][78],'category':category,'rating':rating,'reviews_cnt':reviews_cnt,'lat':x[14][9][2],'lat_txt':str(x[14][9][2]),'lon':x[14][9][3],'lon_txt':str(x[14][9][3]),'topic':topic,'photo':photo,'num_photos':num_photos,'loc_x':loc_x,'loc_y':loc_y,'biz_id':biz_id,'tel':tel,'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
 | 
	
		
			
				|  |  | +        except:
 | 
	
		
			
				|  |  | +            traceback.print_exc()
 | 
	
		
			
				|  |  | +    return resultobj
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def save_js_to_db(jsobj,num,keyword):
 | 
	
		
			
				|  |  | +    global store_list_table
 | 
	
		
			
				|  |  | +    global iddict
 | 
	
		
			
				|  |  | +    for r in jsobj:
 | 
	
		
			
				|  |  | +        if iddict.get(r['place_id']) is not None:
 | 
	
		
			
				|  |  | +            continue
 | 
	
		
			
				|  |  | +        r['num']=num
 | 
	
		
			
				|  |  | +        r['keyword']=keyword
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            store_list_table.insert(r)
 | 
	
		
			
				|  |  | +        except:
 | 
	
		
			
				|  |  | +            traceback.print_exc()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def process_web_request(db, driver, area_num, keyword):
 | 
	
		
			
				|  |  | +    global prev_cnt
 | 
	
		
			
				|  |  | +    request_url = None
 | 
	
		
			
				|  |  | +    time.sleep(0.8)
 | 
	
		
			
				|  |  | +    time.sleep(3)
 | 
	
		
			
				|  |  | +    print("ppppppppp&**********************")
 | 
	
		
			
				|  |  | +    for request in driver.requests:
 | 
	
		
			
				|  |  | +        if 'search?' in request.url :
 | 
	
		
			
				|  |  | +            print('searching.....')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        if request.response:
 | 
	
		
			
				|  |  | +            if 'search?' in request.url :
 | 
	
		
			
				|  |  | +                print('parsing js:')
 | 
	
		
			
				|  |  | +                print(request.url)
 | 
	
		
			
				|  |  | +                resp = brotli.decompress(request.response.body)
 | 
	
		
			
				|  |  | +                jstext = resp.decode('utf-8')
 | 
	
		
			
				|  |  | +                resultobj = parsing_js(jstext)
 | 
	
		
			
				|  |  | +                print("before",datetime.now())
 | 
	
		
			
				|  |  | +                print("num: "+str(area_num))
 | 
	
		
			
				|  |  | +                save_js_to_db(resultobj, area_num, keyword)
 | 
	
		
			
				|  |  | +                print("after",datetime.now())
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                aft_cnt=0
 | 
	
		
			
				|  |  | +                cursor = db.query('select count(*) as cnt from {} where num="{}" '.format(TABLE_STORE_LIST, str(area_num)))
 | 
	
		
			
				|  |  | +                for c in cursor:
 | 
	
		
			
				|  |  | +                    aft_cnt=c['cnt']
 | 
	
		
			
				|  |  | +                    break
 | 
	
		
			
				|  |  | +                db[TABLE_CONV_LOG].insert({'num':area_num,'prev':prev_cnt,'next':aft_cnt,'dt':datetime.now()})
 | 
	
		
			
				|  |  | +    del driver.requests
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def check_area_code(db, kw):
 | 
	
		
			
				|  |  | +    table_name = '{}.{}'.format(MYSQL_CONFIG['MYSQL_DB'], TABLE_AREACODES)
 | 
	
		
			
				|  |  | +    result = db.query('select distinct(kw) from {}'.format(table_name))
 | 
	
		
			
				|  |  | +    result = [i['kw'] for i in result]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    if kw not in result:
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            sql = 'insert into {} (select num,"{}" as kw,0 as expand from {}) '.format(table_name, kw, TABLE_LAT_LON)
 | 
	
		
			
				|  |  | +            db.query(sql) 
 | 
	
		
			
				|  |  | +        except:
 | 
	
		
			
				|  |  | +            traceback.print_exc()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def page_down_(driver, time_):
 | 
	
		
			
				|  |  | +    try:
 | 
	
		
			
				|  |  | +        # action = webdriver.ActionChains(driver)
 | 
	
		
			
				|  |  | +        # element = driver.find_element_by_css_selector('a[aria-label="清除搜尋"]')
 | 
	
		
			
				|  |  | +        # print(element)
 | 
	
		
			
				|  |  | +        # height = element.size['height']
 | 
	
		
			
				|  |  | +        # width = element.size['width']
 | 
	
		
			
				|  |  | +        # action.move_to_element(element).move_by_offset(-width, height).click().perform()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        action = webdriver.ActionChains(driver)
 | 
	
		
			
				|  |  | +        element = driver.find_element_by_css_selector('div[class="TFQHme"]')
 | 
	
		
			
				|  |  | +        action.move_to_element(element).click().perform()
 | 
	
		
			
				|  |  | +        time.sleep(1)
 | 
	
		
			
				|  |  | +        driver.back()
 | 
	
		
			
				|  |  | +        time.sleep(1)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        for i in range(time_):
 | 
	
		
			
				|  |  | +            print(i)
 | 
	
		
			
				|  |  | +            actions = ActionChains(driver)
 | 
	
		
			
				|  |  | +            actions.send_keys(Keys.END).perform()
 | 
	
		
			
				|  |  | +            
 | 
	
		
			
				|  |  | +            time.sleep(0.5)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    except:
 | 
	
		
			
				|  |  | +        traceback.print_exc()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def main():
 | 
	
		
			
				|  |  | +    global chrome_window
 | 
	
		
			
				|  |  | +    global store_list_table
 | 
	
		
			
				|  |  | +    global globalkw
 | 
	
		
			
				|  |  | +    global proxyport
 | 
	
		
			
				|  |  | +    global iddict
 | 
	
		
			
				|  |  | +    global prev_cnt
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    port=4447
 | 
	
		
			
				|  |  | +    if len(sys.argv)>1:
 | 
	
		
			
				|  |  | +        globalkw=sys.argv[1]
 | 
	
		
			
				|  |  | +        port=int(sys.argv[2])
 | 
	
		
			
				|  |  | +        proxyport=int(sys.argv[3])
 | 
	
		
			
				|  |  | +    print(globalkw, port, proxyport)
 | 
	
		
			
				|  |  | +    failcnt=0
 | 
	
		
			
				|  |  | +    localip=socket.gethostbyname(socket.gethostname())
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    db = dataset.connect('mysql://{}:{}@{}/{}?charset=utf8mb4'.format( MYSQL_CONFIG['MYSQL_USER'],
 | 
	
		
			
				|  |  | +        MYSQL_CONFIG['MYSQL_PASSWORD'], MYSQL_CONFIG['MYSQL_HOST'], MYSQL_CONFIG['MYSQL_DB']))
 | 
	
		
			
				|  |  | +    iddict = build_cache(db)
 | 
	
		
			
				|  |  | +    store_list_table = db[TABLE_STORE_LIST]
 | 
	
		
			
				|  |  | +    table2 = db[TABLE_PROGRESS_LIST]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    if not chrome_window:
 | 
	
		
			
				|  |  | +        print('restart docker pw{}'.format(port))
 | 
	
		
			
				|  |  | +        os.system('sudo docker container restart pw'+str(port))
 | 
	
		
			
				|  |  | +        # os.system('docker container restart p'+str(port))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        time.sleep(10)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    print('drvier start...')
 | 
	
		
			
				|  |  | +    driver = brower_start(port)
 | 
	
		
			
				|  |  | +    # check_area_code(db, globalkw)
 | 
	
		
			
				|  |  | +    area_num=None
 | 
	
		
			
				|  |  | +    if len(sys.argv) > 4 :
 | 
	
		
			
				|  |  | +        repkw = sys.argv[1]
 | 
	
		
			
				|  |  | +        repnum = sys.argv[2]
 | 
	
		
			
				|  |  | +        if 'SCAN' in repkw:
 | 
	
		
			
				|  |  | +            job = scan_job(db, repnum)
 | 
	
		
			
				|  |  | +        else:
 | 
	
		
			
				|  |  | +            job = get_next_job(db, repeat=True, repkw=repkw, repnum=repnum)
 | 
	
		
			
				|  |  | +    else:
 | 
	
		
			
				|  |  | +        job = get_next_job(db, repkw=globalkw)
 | 
	
		
			
				|  |  | +    print(job)
 | 
	
		
			
				|  |  | +    keyword  = job['kw']
 | 
	
		
			
				|  |  | +    latitude = job['lat'] #緯度
 | 
	
		
			
				|  |  | +    longitude = job['lon'] #精度
 | 
	
		
			
				|  |  | +    area_num = job['num']
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    safe_string = urllib.parse.quote_plus(keyword)
 | 
	
		
			
				|  |  | +    url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
 | 
	
		
			
				|  |  | +    print(url)
 | 
	
		
			
				|  |  | +    prev_cnt=0
 | 
	
		
			
				|  |  | +    cursor = db.query('select count(*) as cnt from {} where num="{}" '.format(TABLE_STORE_LIST, str(area_num)))
 | 
	
		
			
				|  |  | +    for c in cursor:
 | 
	
		
			
				|  |  | +        prev_cnt = c['cnt']
 | 
	
		
			
				|  |  | +        break
 | 
	
		
			
				|  |  | +    driver.get(url)
 | 
	
		
			
				|  |  | +    time.sleep(2)
 | 
	
		
			
				|  |  | +    keyin_keyword(driver, keyword)
 | 
	
		
			
				|  |  | +    page_down_(driver, 10)
 | 
	
		
			
				|  |  | +    process_web_request(db, driver, area_num, keyword)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    table2.insert({'kw':keyword,'num':job['num']},['kw'])
 | 
	
		
			
				|  |  | +    db.query('update {} set expand = 1 where num="'.format(TABLE_AREACODES)+str(job['num'])+'" and kw="'+keyword+'" ')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +if __name__ == '__main__':
 | 
	
		
			
				|  |  | +    main()
 |