ming
/
ming_TOOLS


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
							import urllib.request #importing to use its urlencode function
import json #for decoding a JSON response
#
def get_infoString(ChannelIdentifier):
    API_KEY = 'AIzaSyDuwkgFVRLOa3gkBU4aeDjVBuogLQ1ZZXE'                                 # What? How? Learn here: https://www.youtube.com/watch?v=JbWnRhHfTDA 
    ch_url = 'https://www.googleapis.com/youtube/v3/channels?part=statistics&id='+ChannelIdentifier+'&key='+API_KEY
    ch_response = urllib.request.urlopen(ch_url) #makes the call to YouTube
    ch_info = json.load(ch_response)
    subscribes = ch_info['items'][0]['statistics']['subscriberCount']
    url = 'https://www.googleapis.com/youtube/v3/search?part=snippet&channelId='+ChannelIdentifier+'&maxResults=150&type=video&key='+API_KEY
    response = urllib.request.urlopen(url) #makes the call to YouTube
    videos = json.load(response) #decodes the response so we can work with it
    videoMetadata = [] #declaring our list
    for video in videos['items']:
        if video['id']['kind'] == 'youtube#video':
            videoMetadata.append(video['id']['videoId']) #Appends each videoID and link to our list
    #
    # In this second part, a loop will run through the listvideoMetadata
    # During each step the details a specific video are retrieved and displayed
    # The structure of the API-return can be tested with the API explorer (which you can excecute without OAuth):
    # https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.videos.list?part=snippet%252CcontentDetails%252Cstatistics&id=Ks-_Mh1QhMc&_h=1&
    #
    vlist = []
    for metadata in tqdm(videoMetadata,leave=False):
        SpecificVideoID = metadata
        SpecificVideoUrl = 'https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id='+SpecificVideoID+'&key='+API_KEY
        response = urllib.request.urlopen(SpecificVideoUrl) #makes the call to a specific YouTube
        videos = json.load(response) #decodes the response so we can work with it
        videoMetadata = [] #declaring our list
        for video in videos['items']: 
            
            if video['kind'] == 'youtube#video':
                #print(video['statistics'].keys())
                '''
                
                print("Upload date:                "+video['snippet']['publishedAt'])        # Here the upload date of the specific video is listed
                print("Number of views:        "+video['statistics']['viewCount'])     # Here the number of views of the specific video is listed
                print("Number of likes:        "+video['statistics']['likeCount'])    # etc
                    #print("Number of dislikes: "+video['statistics']['dislikeCount'])
                print("Number of favorites:"+video['statistics']['favoriteCount'])
                print("Number of comments: "+video['statistics']['commentCount'])
                print("\n")
                '''
                commentCount = 0
                try:
                    commentCount=video['statistics']['commentCount']
                except:
                    pass
                likeCount=0
                try:
                    likeCount = video['statistics']['likeCount']
                except:
                    pass
                
                vlist.append([video['snippet']['channelTitle'],subscribes,video['snippet']['description'],video['snippet']['title']
                ,video['snippet']['publishedAt'],video['statistics']['viewCount'],likeCount
                ,video['statistics']['favoriteCount'],commentCount])
    return vlist


import csv

from youtubesearchpython import *
from tqdm import tqdm
import time
pages = 100
max_channels = 8 * pages

search = ChannelsSearch('科技', limit = 8)
c_list = []
print('Filtering Channels................')
for p in tqdm(range(pages)):
    for channel in search.result()['result']:
      
        if channel['subscribers'] is None:
            continue

        subscribes = channel['subscribers'].replace(' subscribers','').replace(' subscriber','')
        
        if 'K' in subscribes:
            subscribes = subscribes.replace('K','')
            if '.' in subscribes :
                subscribes=subscribes.replace('.','') 
                subscribes+='00'
            else:
                subscribes+='000'

        if 'M' in subscribes:
            subscribes = subscribes.replace('M','')
            if '.' in subscribes :
                zeros = len(subscribes)-subscribes.index('.')-1
                subscribes=subscribes.replace('.','') 
                subscribes += '000000'[:6-zeros]

        if int(subscribes)>500:
            c_list.append(channel)
    search.next()


with open('youtubeReport.csv', 'w', newline='',encoding='UTF-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['channelTitle','subscribes', 'description', 'videoTitle','publishedAt','viewCount','likeCount','favoriteCount','commentCount'])


    for ch in tqdm(c_list):
        channel_id = ch['id']
        vlist = get_infoString(channel_id)

        for v in vlist:
            writer.writerow([v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]])