@@ -7,7 +7,7 @@ import time
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
-from fastapi import FastAPI, Form, Response
+from fastapi import FastAPI, Form, Response, File, UploadFile, Request
import subprocess
import suggests
from typing import Optional
@@ -20,17 +20,27 @@ import logging
import threading
import random
import string
-from fastapi.responses import HTMLResponse
-from fastapi.responses import RedirectResponse
+from fastapi.responses import HTMLResponse,RedirectResponse, FileResponse
import dataset
-from selenium import webdriver
import traceback
import time
+from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
-# import pymysql
-# pymysql.install_as_MySQLdb()
+import networkx as nx
+from pyvis.network import Network
+import csv
+import sys
+import codecs
+import difflib
+import pymysql
+from pathlib import Path
+from tempfile import NamedTemporaryFile
+from typing import Callable
+import shutil
+import aiofiles
driver = None
@@ -50,8 +60,8 @@ app.add_middleware(
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/gtrends?charset=utf8mb4')
-# app.mount("/web", StaticFiles(directory="static"), name="static")
-app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static")
+app.mount("/web", StaticFiles(directory="/Users/zooeytsai/kw_tools/web/static"), name="static")
+# app.mount("/web", StaticFiles(directory="/root/src/kw_tools/web/static"), name="static")
def thread_function(kw):
global db
@@ -217,3 +227,111 @@ async def tree_list():
# html += "<tr><td>" + c['kw'] + "</td>"
return html
+G = nx.Graph()
+def gcm0(strings):
+ clusters = {}
+ for string in (x.strip() for x in strings):
+ match = difflib.get_close_matches(string, clusters.keys(), 8, 0.65)
+ if match:
+ clusters[match[0]].append(string)
+ else:
+ clusters[string] = [ string ]
+ return clusters
+def proc_row(row):
+ elmts=row.split(' ')
+ for elmt in elmts:
+ if kwdict.get(elmt) is None:
+ kwdict[elmt]=1
+ else:
+ kwdict[elmt]+=1
+def save_upload_file_tmp(file: UploadFile) -> Path:
+ try:
+ suffix = Path(file.filename).suffix
+ with NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+ shutil.copyfileobj(file.file, tmp)
+ tmp_path = Path(tmp.name)
+ finally:
+ file.file.close()
+ return tmp_path
+async def kwtree(file: UploadFile = File(...)):
+ destination_file_path = "/Users/zooeytsai/Downloads/" + file.filename
+ with codecs.open(destination_file_path,'r','utf-16') as out_file:
+ # with codecs.open(file.file) as csvfile:
+ # csv_reader = csv.reader(codecs.iterdecode(file.file, 'utf-8'))
+ csvfile = csv.reader(out_file, delimiter='\t', quotechar='|')
+ # spamreader = csv.reader(csvfile, delimiter='\t', quotechar='|')
+ kwdict = {}
+ addict = {}
+ head = True
+ rowlst = []
+ for row in csvfile:
+ if head:
+ head = False
+ continue
+ ll = len(row)
+ proc_row(row[0])
+ if row not in rowlst:
+ rowlst.append(row[0])
+ head = True
+ clusters = gcm0(rowlst)
+ keys = []
+ for k, v in clusters.items():
+ # if len(v) > 20:
+ keys.append(k)
+ for x in v:
+ G.add_edge(k, x, weight=1, label='')
+ already_dict = {}
+ from strsimpy.qgram import QGram
+ qgram = QGram(2)
+ for k1 in keys:
+ for k2 in keys:
+ if k1 != k2:
+ if qgram.distance(k1, k2) <= 12:
+ if already_dict.get(k1) is None and already_dict.get(k2) is None:
+ already_dict[k1] = 1
+ already_dict[k2] = 1
+ G.add_edge(k1, k2, weight=1, label='')
+ pyG = Network(height="100%", width="100%", bgcolor="#444444", font_color="white")
+ pyG.set_options("""
+ const options = {
+ "nodes" : {
+ "font" : {
+ "size" : "30",
+ "color" : "#ffffff"
+ }
+ },
+ "physics": {
+ "forceAtlas2Based": {
+ "springLength": 100
+ },
+ "maxVelocity": 150,
+ "minVelocity": 0.28,
+ "solver": "forceAtlas2Based"
+ }
+ }
+ """)
+ G.remove_edges_from(nx.selfloop_edges(G))
+ pyG.from_nx(G)
+ # pyG.show_buttons(filter_=['physics'])
+ news_file = random.randint(0,100)
+ pyG.show(f'news{news_file}.html')
+ check_file = False
+ # while
+ # print(clusters)
+ # sys.exit()
+ return FileResponse(f'/Users/zooeytsai/kw_tools/choozmo/news{news_file}.html',media_type='text/html')