designer_gen.py 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #!/usr/bin/python3
  2. import os
  3. import codecs
  4. import dataset
  5. def extract_comp(fname):
  6. resultstr=""
  7. fr=codecs.open(fname,'r','utf-8')
  8. lines=fr.readlines()
  9. inloop=False
  10. for l in lines:
  11. if inloop:
  12. # if '<script async defer crossorigin="anonymous" src="https://connect.' in l:
  13. # break
  14. if 'class="footer pt-1 d-none d-lg-block"' in l and 'footer' in l:
  15. break
  16. resultstr+=l+"\n"
  17. if 'class="dBlock"' in l and 'section' in l:
  18. # if 'row justify-content-between mt-5' in l:
  19. inloop=True
  20. resultstr+=l+"\n"
  21. continue
  22. return resultstr
  23. #<script async defer crossorigin="anonymous" src="https://connect.facebook.net/
  24. def proc_file(fname,fname2,compcontent):
  25. fr=codecs.open(fname,'r','utf-8')
  26. fw=codecs.open(fname2,'w','utf-8')
  27. lines=fr.readlines()
  28. scriptflag=False
  29. for l in lines:
  30. # <div class="row mt-5" id="row">
  31. if 'div' in l and 'class' in l and 'row mt-5' in l and 'id="row"' in l:
  32. fw.write('<div class="row mt-5" id="row_intro" style="display:none">\n')
  33. fw.write(compcontent)
  34. fw.write('</div>\n')
  35. fw.write('<div class="row mt-5" id="row_company">\n')
  36. if 'nav-link' in l and '設計師作品' in l and 'class' in l:
  37. fw.write('<a class="nav-link " onclick="c_intro();">設計師作品</a>')
  38. continue
  39. if 'nav-link' in l and '設計師公司簡介' in l and 'class' in l:
  40. fw.write('<a class="nav-link " onclick="c_comp();">設計師公司簡介</a>')
  41. continue
  42. fw.write(l)
  43. if '</script>' in l and not scriptflag:
  44. scriptflag=True
  45. fw.write("\n <script>")
  46. fw.write("function c_intro(){ \n")
  47. fw.write("var rintro= document.getElementById('row_intro'); \n")
  48. fw.write("var rcomp = document.getElementById('row_company'); \n")
  49. fw.write(' rintro.style.display="block"; \n')
  50. fw.write(' rcomp.style.display = "none"; \n')
  51. fw.write("} \n")
  52. fw.write("function c_comp(){ \n")
  53. fw.write("var rintro= document.getElementById('row_intro'); \n")
  54. fw.write("var rcomp = document.getElementById('row_company'); \n")
  55. fw.write(' rintro.style.display="none"; \n')
  56. fw.write(' rcomp.style.display = "block"; \n')
  57. fw.write("} </script> \n")
  58. #
  59. # print(lines)
  60. fr.close()
  61. fw.close()
  62. db = dataset.connect('mysql://hhh7796hhh:lYmWsu^ujcA1@hhh-v57-cluster.cluster-cmab1ctkglka.ap-northeast-2.rds.amazonaws.com:3306/xoops?charset=utf8mb4')
  63. dir_prefix='/var/www/jared-six/'
  64. cursor=db.query('SELECT hdesigner_id FROM xoops._hdesigner where onoff=1; ')
  65. for c in cursor:
  66. did=c['hdesigner_id']
  67. print(did)
  68. curdir=dir_prefix+str(did)
  69. if not os.path.exists(curdir):
  70. os.mkdir(curdir)
  71. os.chdir(curdir)
  72. os.system('curl https://hhh.com.tw/designers/cases/'+str(did)+'/1-page/new-sort/ -o orig.html')
  73. os.system('curl https://hhh.com.tw/designers/details/'+str(did)+'/ -o comp.html')
  74. result=extract_comp(curdir+"/comp.html")
  75. print(result)
  76. proc_file(curdir+"/orig.html",curdir+"/index.html",result)
  77. break
  78. #for l in lst:
  79. # os.chdir(curdir)
  80. # os.system('curl https://hhh.com.tw/columns/detail/'+l+'/ > index.html')