Your Name 3 vuotta sitten
vanhempi
commit
29295bad34

+ 161 - 0
python_utils/GoogleNews2.py

@@ -0,0 +1,161 @@
+#import urllib.request
+import urllib
+import requests
+import traceback
+from bs4 import BeautifulSoup as Soup
+#from fp.fp import FreeProxy
+import socks
+import ssl
+import socket
+class GoogleNews:
+
+    def __init__(self):
+        self.texts = []
+        self.links = []
+        self.results = []
+        self.user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:64.0) Gecko/20100101 Firefox/64.0'
+        self.headers = {'User-Agent': self.user_agent}
+#        self.proxy = FreeProxy().get()
+
+    def search(self, key):
+        self.key = "+".join(key.split(" "))
+        self.getpage()
+
+    def getpage(self, page=1):
+        self.key=urllib.parse.urlencode({'q':self.key})
+        print(self.key)
+        self.url = "https://www.google.com/search?" + self.key + "&tbm=nws&start=%d" % (10 * (page - 1))
+        try:
+            print(self.url)
+#            proxy_support = urllib.request.ProxyHandler(self.proxy)
+#            opener = urllib.request.build_opener(proxy_support)
+#            urllib.request.install_opener(opener)
+            self.req = urllib.request.Request(self.url, headers=self.headers)
+#            self.req = urllib.request.Request(self.url, headers=self.headers)
+
+            ctx = ssl.create_default_context()
+            ctx.check_hostname = False
+            ctx.verify_mode = ssl.CERT_NONE
+
+            socks.set_default_proxy(socks.SOCKS5, '172.104.67.159', 8180)
+            socket.socket = socks.socksocket
+
+
+            self.response = urllib.request.urlopen(self.req, context=ctx)
+#            self.response = urllib.request.urlopen(self.url)
+
+            print('before')
+            
+            #self.response=requests.get(self.url)
+            self.page = self.response.read().decode('utf-8')
+#            print(self.page)
+#            self.page = self.response.decode('utf-8')
+            print('after')
+            self.content = Soup(self.page, "html.parser")
+
+
+#            result = self.content.find_all("div", class_="g")
+            result = self.content.find_all("div", class_="dbsr")
+
+            for item in result:
+#                print(item)
+#                self.texts.append(item.find("h3").text)
+                link=None
+                img=None
+
+                try:
+#                    link=item.find("h3").find("a").get("href")
+                    link=item.find("a").get("href")
+                    print(item.find("a").text)
+
+                    #title=lin
+                    self.links.append(link)
+                except:
+                    link=None
+                    print('no linkes')
+                    continue
+
+                try:
+                    img=item.find("img").get("src")
+                except:
+                    img=None
+                    print('no img')
+
+
+                try:
+                    self.results.append(
+#                        {"title":item.find("h3").text, 'link':link})
+                        {"title":'title', 'link':link})
+
+#                        {'title': item.find("h3").text, 'media': item.find("div", class_="slp").find_all("span")[0].text,
+#                        'date': item.find("div", class_="slp").find_all("span")[2].text,
+#                        'desc': item.find("div", class_="st").text, 'link': link,'img':img
+#                        })
+                except:
+                    print('exp')
+
+            self.response.close()
+        except Exception as e:
+            traceback.print_exc()
+            print(e)
+            pass
+
+    def get_news(self, deamplify=False):
+        self.url = 'https://news.google.com/'
+        try:
+#            self.req = urllib.request.Request(self.url, headers=self.headers)
+#            self.response = urllib.request.urlopen(self.req)
+            self.response=requests.get(self.url)
+#            print(self.response)
+            self.page = self.response.text
+            print(self.page)
+
+
+
+
+            self.page = self.response.read()
+            self.content = Soup(self.page, "html.parser")
+            result = self.content.find_all("article")
+            for item in result:
+                try:
+                    title = item.find("h3").text
+                    if deamplify:
+                        try:
+                            link = item.find("a").get("jslog").split('2:')[1].split(';')[0]
+                        except Exception as e:
+                            print(e)
+                            link = item.find("h3").find("a").get("href")
+                    else:
+                        link = item.find("h3").find("a").get("href")
+                    self.texts.append(title)
+                    self.links.append(link)
+                    self.results.append(
+                        {'title': title,
+                         'datetime': item.find("time").get("datetime"),
+                         'time': item.find("time").text,
+                         'desc': item.find("h3").next_sibling.text,
+                         'link': link,
+                         'media': None,
+                         'img': item.previous_sibling.find("img").get("src")})
+                except Exception as e:
+                    pass
+            self.response.close()
+        except Exception as e:
+            print(e)
+            pass
+
+    def result(self):
+        return self.results
+
+    def gettext(self):
+        return self.texts
+
+    def getlinks(self):
+        return self.links
+
+    def clear(self):
+        self.texts = []
+        self.links = []
+        self.results = []
+
+

+ 9 - 0
webSite/content/trenddetail/20210726-detail.md

@@ -0,0 +1,9 @@
++++
+title = "趨勢日報"
+date = "2021-07-07T00:39:46+02:00"
+layout = "trenddetail"
+banner = "img/banners/banner-3.jpg"
+title1 = "custom variables"
+link1 = "第二名測試"
+desc1 = "第二名測試"
++++

+ 30 - 0
webSite/content/trends/20210726.md

@@ -0,0 +1,30 @@
++++
+title = "趨勢日報"
+date = "2021-07-26T00:39:46+02:00"
+tags = ["daily-trend-watch"]
+categories = ["trends"]
+layout = "trends"
+banner = "img/banners/banner-3.jpg"
+no1 = "custom variables"
+no2 = "第二名測試"
+no3 = "第3名測試"
+no4 = "第4名測試"
+no5 = "第5名測試"
+no6 = "第6名測試"
+no7 = "第7名測試"
+no8 = "第8名測試"
+no9 = "第9名測試"
+no10 = "第10名測試"
+no11 = "第11名測試"
+no12 = "第12名測試"
+no13 = "第13名測試"
+no14 = "第14名測試"
+no15 = "第15名測試"
+no16 = "第16名測試"
+no17 = "第17名測試"
+no18 = "第18名測試"
+no19 = "第19名測試"
+no20 = "第20名測試"
+no21 = "第21名測試"
++++
+