main.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. from pydoc import HTMLDoc
  2. from fastapi import FastAPI
  3. import dataset
  4. import sys
  5. import os
  6. import time
  7. from fastapi.middleware.cors import CORSMiddleware
  8. from fastapi.staticfiles import StaticFiles
  9. from pydantic import BaseModel
  10. from fastapi import FastAPI, Form, Response
  11. import subprocess
  12. import suggests
  13. from typing import Optional
  14. import networkx as nx
  15. import pyvis
  16. import time
  17. from pyvis.network import Network
  18. import pickle
  19. import logging
  20. import threading
  21. import random
  22. import string
  23. from fastapi.responses import HTMLResponse
  24. from fastapi.responses import RedirectResponse
  25. import dataset
  26. from selenium import webdriver
  27. import traceback
  28. import time
  29. from selenium.webdriver.common.keys import Keys
  30. from selenium.webdriver.common.by import By
  31. from selenium.webdriver.chrome.service import Service
  32. <<<<<<< HEAD
  33. # import pymysql
  34. # pymysql.install_as_MySQLdb()
  35. =======
  36. import pymysql
  37. pymysql.install_as_MySQLdb()
  38. >>>>>>> 1e549144883c5497a3deb5317546a98a9e9c2890
  39. driver = None
  40. def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
  41. return ''.join(random.choice(chars) for _ in range(size))
  42. app = FastAPI()
  43. origins = ["*"]
  44. app.add_middleware(
  45. CORSMiddleware,
  46. allow_origins=origins,
  47. allow_credentials=True,
  48. allow_methods=["*"],
  49. allow_headers=["*"],
  50. )
  51. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
  52. # app.mount("/web", StaticFiles(directory="static"), name="static")
  53. app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static")
  54. def thread_function(kw):
  55. global db
  56. print(kw)
  57. G = nx.Graph()
  58. for k in kw:
  59. s = suggests.suggests.get_suggests(k, source='google')
  60. for sg in s['suggests']:
  61. G.add_edge(k,sg,weight=1)
  62. print(sg)
  63. time.sleep(1)
  64. s2 = suggests.suggests.get_suggests(k, source='google')
  65. for elmt in s2['suggests']:
  66. G.add_edge(sg,elmt,weight=1)
  67. # G.remove_nodes_from(list(nx.isolates(G)))
  68. G.remove_edges_from( list(nx.selfloop_edges(G)))
  69. # pickle.dump( G, open( "gs2.p", "wb" ) )
  70. pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")
  71. pyG.from_nx(G)
  72. id=id_generator()
  73. db['gen_graph'].insert({'filename':str(id),'kw':str(kw)})
  74. # pyG.save_graph('gstest')
  75. # pyG.show('static/gs/'+str(id)+'.html')
  76. pyG.save_graph('static/gs/'+str(id)+'.html')
  77. @app.get("/tree_list/",response_class=HTMLResponse)
  78. async def tree_list():
  79. # global db
  80. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
  81. html="<html><body><h2>清單</h2></br>請一分鐘後refresh </br></br>"
  82. html+="<table border='1'>"
  83. cursor=db.query('select filename,kw from gen_graph order by id desc')
  84. cnt=0
  85. for c in cursor:
  86. html+="<tr><td>"+c['kw']+"</td>"
  87. html+="<td><a href='/web/gs/"+c['filename']+".html'>"+c['filename']+"</a></td></tr>"
  88. cnt+=1
  89. if cnt > 10:
  90. break
  91. html+="</table></body></html>"
  92. return html
  93. @app.post("/proj_kw/",response_class=HTMLResponse)
  94. async def proj_kw(proj: str = Form(...),kws:Optional[str] = Form(None)):
  95. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  96. table=db['serp_jobs']
  97. for kw in kws:
  98. table.insert({'proj':proj,'kw':kw})
  99. return "OK請稍後"
  100. #response_class=RedirectResponse
  101. @app.post("/gen_tree/",response_class=HTMLResponse)
  102. async def func_expand(kw: str = Form(...),kw2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),kw4:Optional[str] = Form(None) ):
  103. kwlst=[]
  104. if len(kw)>1:
  105. kwlst.append(kw)
  106. if kw2 is not None:
  107. kwlst.append(kw2)
  108. if kw3 is not None:
  109. kwlst.append(kw3)
  110. if kw4 is not None:
  111. kwlst.append(kw4)
  112. x = threading.Thread(target=thread_function, args=(kwlst,))
  113. x.start()
  114. # return "ok"
  115. return RedirectResponse(url="/tree_list",status_code=302)
  116. # return HTMLResponse('<html><head><meta http-equiv="refresh" content="0; URL="/tree_list" /></head></html>')
  117. def restart_browser():
  118. os.system('docker container stop p4444')
  119. time.sleep(1)
  120. os.system('docker container rm p4444')
  121. time.sleep(1)
  122. os.system('docker run -d -p ' + '4458' + ':4444 --name p4444 --add-host=host.docker.internal:host-gateway --shm-size="2g" selenium/standalone-chrome:103.0')
  123. time.sleep(18)
  124. options = webdriver.ChromeOptions()
  125. options.add_argument("--no-sandbox")
  126. options.add_argument("--disable-dev-shm-usage")
  127. options.add_argument('--headless')
  128. <<<<<<< HEAD
  129. s = Service('/root/drivers/102/chromedriver')
  130. driver = webdriver.Chrome(options=options,service=s)
  131. =======
  132. ddriver = webdriver.Remote(command_executor='http://127.0.0.1:4458/wd/hub',options=options)
  133. >>>>>>> 1e549144883c5497a3deb5317546a98a9e9c2890
  134. driver.set_window_size(950, 20000)
  135. return driver
  136. @app.post("/ranking/")
  137. async def ranking(kw: str = Form(...), domain:str = Form(...),kw2:Optional[str] = Form(None),domain2:Optional[str] = Form(None),kw3:Optional[str] = Form(None),domain3:Optional[str] = Form(None),kw4:Optional[str] = Form(None),domain4:Optional[str] = Form(None),kw5:Optional[str] = Form(None),domain5:Optional[str] = Form(None)):
  138. kwlst = []
  139. kwlst.append([kw,domain])
  140. if kw2 is not None:
  141. kwlst.append([kw2,domain2])
  142. if kw3 is not None:
  143. kwlst.append([kw3,domain3])
  144. if kw4 is not None:
  145. kwlst.append([kw4,domain4])
  146. if kw5 is not None:
  147. kwlst.append([kw5,domain5])
  148. result = []
  149. for i in kwlst:
  150. driver = restart_browser()
  151. # escaped_search_term=urllib.parse.quote(term)
  152. googleurl = 'https://www.google.com/?num=100'
  153. driver.get(googleurl)
  154. time.sleep(6)
  155. send_kw_elmt = driver.find_element(By.XPATH,
  156. '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
  157. send_kw_elmt.send_keys(i[0])
  158. time.sleep(3)
  159. send_kw_elmt.send_keys(Keys.ENTER)
  160. time.sleep(6)
  161. elmts = driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
  162. cnt = 1
  163. datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []}
  164. domain_name = i[1]
  165. for elmt in elmts:
  166. try:
  167. href = elmt.get_attribute('href')
  168. if domain_name in href:
  169. datadict['搜尋詞'].append(i[0])
  170. datadict['結果標題'].append(elmt.text)
  171. datadict['結果網址'].append(href)
  172. datadict['結果名次'].append(str(cnt))
  173. cnt += 1
  174. except:
  175. print('href2 exception')
  176. traceback.print_exc()
  177. result.append(datadict)
  178. print(domain_name)
  179. print(datadict)
  180. driver.quit()
  181. print('數量',len(elmts))
  182. time.sleep(30)
  183. # return "ok"
  184. # return RedirectResponse(url="/ranking_result",)
  185. html = f"<html><body>{result}</body></html>"
  186. return html
  187. @app.get("/ranking_result/")
  188. async def tree_list():
  189. html = "<table border='1'>"
  190. # html += "<tr><td>" + c['kw'] + "</td>"
  191. return html