choozmo
/
hhh_staticpage_tools


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
							#!/usr/bin/python3
import os
import codecs
import dataset


def extract_comp(fname):
    resultstr=""
    fr=codecs.open(fname,'r','utf-8')
    lines=fr.readlines()
    inloop=False
    for l in lines:
        if inloop:
 #           if '<script async defer crossorigin="anonymous" src="https://connect.' in l:
 #               break
            if '/article>' in l:
                resultstr+=l
                break
 
            resultstr+=l+"\n"
        if 'class="dAbout"' in l and 'article' in l:     
#        if 'row justify-content-between mt-5' in l:
            inloop=True
            resultstr+=l+"\n"
            continue
    return resultstr


#<script async defer crossorigin="anonymous" src="https://connect.facebook.net/
def proc_file(fname,fname2,compcontent):
    fr=codecs.open(fname,'r','utf-8')
    fw=codecs.open(fname2,'w','utf-8')
    lines=fr.readlines()
    scriptflag=False
    for l in lines:
#        <div class="row mt-5" id="row">
        if 'div' in l and 'class' in l and 'row mt-5' in l and 'id="row"' in l:
            fw.write('<div class="row mt-5" id="row_company" style="display:none">\n')
            fw.write(compcontent)
            fw.write('</div>\n')
            fw.write('<div class="row mt-5" id="row_intro">\n')

        if 'nav-link' in l and '設計師作品' in l and 'class' in l:
            fw.write('<a class="nav-link "  onclick="c_intro();">設計師作品</a>')
            continue

        if 'nav-link' in l and '設計師公司簡介' in l and 'class' in l:
            fw.write('<a class="nav-link "  onclick="c_comp();">設計師公司簡介</a>')
            continue

        fw.write(l)
        if '</script>' in l and not scriptflag:
            scriptflag=True
            fw.write("\n <script>")
            fw.write("function c_intro(){ \n")
            fw.write("var rintro= document.getElementById('row_intro'); \n")
            fw.write("var rcomp =  document.getElementById('row_company'); \n")
            fw.write(' rintro.style.display="block";  \n')
            fw.write(' rcomp.style.display = "none";  \n')
            fw.write("} \n")
            fw.write("function c_comp(){ \n")
            fw.write("var rintro= document.getElementById('row_intro'); \n")
            fw.write("var rcomp =  document.getElementById('row_company'); \n")
            fw.write(' rintro.style.display="none";  \n')
            fw.write(' rcomp.style.display = "block";  \n')
            fw.write("} </script> \n")

#            
#    print(lines)
    fr.close()
    fw.close()

db = dataset.connect('mysql://hhh7796hhh:lYmWsu^ujcA1@hhh-v57-cluster.cluster-cmab1ctkglka.ap-northeast-2.rds.amazonaws.com:3306/xoops?charset=utf8mb4')
dir_prefix='/var/www/jared-six/'

cursor=db.query('SELECT hdesigner_id FROM xoops._hdesigner where onoff=1; ')
for c in cursor:
    did=c['hdesigner_id']
    print(did)
    curdir=dir_prefix+str(did)
    if not os.path.exists(curdir):
        os.mkdir(curdir)
    os.chdir(curdir)
    os.system('curl https://hhh.com.tw/designers/cases/'+str(did)+'/1-page/new-sort/ -o orig.html')
    os.system('curl https://hhh.com.tw/designers/details/'+str(did)+'/ -o comp.html')

    result=extract_comp(curdir+"/comp.html")
    print(result)
    proc_file(curdir+"/orig.html",curdir+"/index.html",result)
#    break


#for l in lst:
#    os.chdir(curdir)
#    os.system('curl https://hhh.com.tw/columns/detail/'+l+'/ > index.html')