| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178 | from instaloader import Instaloader, Profileimport tracebackimport copyimport operatorimport datasetimport pandas as pdimport networkx as nx#import pysftpimport codecsimport pyvisimport sysimport pickleimport osimport searchconsolefrom pyvis.network import Networkimport jiebapname='hhh.rb'db = dataset.connect('sqlite:///'+pname+".db")table=db['tmp']#pname='cont'#pname='damanwoo'#pname='drama'#pname='news'#pname='www'#pname='ipromise'#pname='sports'#pname='rumor'#pname='korea'pname='hhh.rb'rid=0def get_css():    fr=codecs.open('jared/data/css.txt','r','utf-8')    lines=fr.readlines()    content=' '.join(lines)    fr.close()    return contentdef modify_file(fname):    fr=codecs.open(fname,'r','utf-8')    lines=fr.readlines()    fr.close()#    css=get_css()    css=''    content_output=''    for l in lines:        if '<body>' in l[0:10]:            content_output+=l            content_output+='\n<div id="google">\n'            continue        if '<style type="text' in l[0:22]:            content_output+=l            content_output+="\n"+css+"\n"            continue        if '<div id = "mynetwork"' in l[0:30]:            content_output+=l            content_output+='\n</div>\n'            continue        content_output+=l    fw=codecs.open("mod_"+fname,'w','utf-8')    fw.write(content_output)    fw.close()def gen_pic():    G=None#    if os.path.exists(pname):#        G = pickle.load( open( pname, "rb" ) )#    else:#        G = nx.Graph()    G = nx.Graph()    finallist=[]#    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having count(url) <= 3) as tbl1 ) order by q')#    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having count(url) <= 3) as tbl1 ) order by q')#    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q having ) as tbl1 ) order by q')#    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q   ) as tbl1 ) order by q')#    cursor=db.query('select q,rid,url from tmp where q in (select distinct q from  (select q,count(url) from tmp where length(q)> 2 group by q   having count(url) >20) as tbl1 ) order by q')#    cursor=db.query('select distinct q,url from tmp where url in (select distinct url from  (select url,count(q) from tmp where length(q)> 2 group by url   having count(q) >5 and count(q)<10 ) as tbl1) order by url')    cursor=db.query('select distinct q,url from tmp where url in (select distinct url from  (select url,count(q) from tmp where length(q)> 2 group by url   having count(q) >4 and count(q)<9 ) as tbl1) order by url')    urldict={}    for c in cursor:        url=c['url'].replace('https://www.hhh.com.tw','')        url=url.replace('https://hhh.com.tw','')        url=url.replace('https://m.hhh.com.tw','')        q=c['q']        if urldict.get(url) is None:            urldict[url]=[q]        else:            urldict[url].append(q)    print(len(urldict.keys()))    cnt=0    for k,v in urldict.items():        if len(v)>=2:            for itm in v:                G.add_edge(k,itm,weight=3,width=3,borderwidth=3)                cnt+=1                if cnt%1000 == 0:                    print(cnt)#            G.add_edge(c['q'],c['rid'],weight=3,width=3,borderwidth=3)#    pickle.dump( G, open( pname, "wb" ) )#    G2 = [G.subgraph(c).copy() for c in nx.connected_components(G)]#    remove = [node for node,degree in dict(G.degree()).items() if degree <2]#    G.remove_nodes_from(remove)    remove=[]#    for n in G.nodes:#        if '承諾' in n:#            remove.append(n)#        if 'promise' in n:#            remove.append(n)#    G.remove_nodes_from(remove)    to_remove=[]    for n in G:        dg=G.degree(n)        if dg > 15:            to_remove.append(n)    G.remove_nodes_from(to_remove)    G.remove_edges_from(nx.selfloop_edges(G))    G.remove_nodes_from(list(nx.isolates(G)))#    lst= [G.subgraph(c).copy() for c in nx.connected_components(G)]#    lst=[]#    for c in nx.connected_components(G):#        cc=G.subgraph(c).copy()#        if cc.number_of_nodes()>7:#            lst.append(cc)#        if nx.diameter(cc, e=None, usebounds=False)>1:#            lst.append(cc)#    G2=nx.compose_all(lst)    G2=G#    pyG = Network(height="750px", width="100%",bgcolor="#333333",font_color="white")    pyG = Network(height="600px", width="100%",bgcolor="#444444",font_color="white")    pyG.from_nx(G2)    pyG.show(pname+'.html')    modify_file(pname+'.html')#    cnopts = pysftp.CnOpts()#    cnopts.hostkeys = None#    s = pysftp.Connection(host='www.choozmo.com', username='jared', password='sstc5202',cnopts=cnopts)#    local_path = "mod_"+pname+".html"#    remote_path = "/home/nginx/farmoutput/tags/"+"mod_"+pname+".html"#    s.put(local_path, remote_path)    return finallistgen_pic()#r=checkig('信用卡')#print(r)#        network.on( 'click', function(properties) {#    var ids = properties.nodes;#    var clickedNodes = nodes.get(ids);# var copyText = clickedNodes[0].label;# var promise = navigator.clipboard.writeText(copyText);#//    console.log('clicked nodes:', clickedNodes);#});
 |