import urllib.request #importing to use its urlencode function import json #for decoding a JSON response # def get_infoString(ChannelIdentifier): API_KEY = 'AIzaSyDuwkgFVRLOa3gkBU4aeDjVBuogLQ1ZZXE' # What? How? Learn here: https://www.youtube.com/watch?v=JbWnRhHfTDA ch_url = 'https://www.googleapis.com/youtube/v3/channels?part=statistics&id='+ChannelIdentifier+'&key='+API_KEY ch_response = urllib.request.urlopen(ch_url) #makes the call to YouTube ch_info = json.load(ch_response) subscribes = ch_info['items'][0]['statistics']['subscriberCount'] url = 'https://www.googleapis.com/youtube/v3/search?part=snippet&channelId='+ChannelIdentifier+'&maxResults=150&type=video&key='+API_KEY response = urllib.request.urlopen(url) #makes the call to YouTube videos = json.load(response) #decodes the response so we can work with it videoMetadata = [] #declaring our list for video in videos['items']: if video['id']['kind'] == 'youtube#video': videoMetadata.append(video['id']['videoId']) #Appends each videoID and link to our list # # In this second part, a loop will run through the listvideoMetadata # During each step the details a specific video are retrieved and displayed # The structure of the API-return can be tested with the API explorer (which you can excecute without OAuth): # https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.videos.list?part=snippet%252CcontentDetails%252Cstatistics&id=Ks-_Mh1QhMc&_h=1& # vlist = [] for metadata in tqdm(videoMetadata,leave=False): SpecificVideoID = metadata SpecificVideoUrl = 'https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id='+SpecificVideoID+'&key='+API_KEY response = urllib.request.urlopen(SpecificVideoUrl) #makes the call to a specific YouTube videos = json.load(response) #decodes the response so we can work with it videoMetadata = [] #declaring our list for video in videos['items']: if video['kind'] == 'youtube#video': #print(video['statistics'].keys()) ''' print("Upload date: "+video['snippet']['publishedAt']) # Here the upload date of the specific video is listed print("Number of views: "+video['statistics']['viewCount']) # Here the number of views of the specific video is listed print("Number of likes: "+video['statistics']['likeCount']) # etc #print("Number of dislikes: "+video['statistics']['dislikeCount']) print("Number of favorites:"+video['statistics']['favoriteCount']) print("Number of comments: "+video['statistics']['commentCount']) print("\n") ''' commentCount = 0 try: commentCount=video['statistics']['commentCount'] except: pass likeCount=0 try: likeCount = video['statistics']['likeCount'] except: pass vlist.append([video['snippet']['channelTitle'],subscribes,video['snippet']['description'],video['snippet']['title'] ,video['snippet']['publishedAt'],video['statistics']['viewCount'],likeCount ,video['statistics']['favoriteCount'],commentCount]) return vlist import csv from youtubesearchpython import * from tqdm import tqdm import time pages = 100 max_channels = 8 * pages search = ChannelsSearch('科技', limit = 8) c_list = [] print('Filtering Channels................') for p in tqdm(range(pages)): for channel in search.result()['result']: if channel['subscribers'] is None: continue subscribes = channel['subscribers'].replace(' subscribers','').replace(' subscriber','') if 'K' in subscribes: subscribes = subscribes.replace('K','') if '.' in subscribes : subscribes=subscribes.replace('.','') subscribes+='00' else: subscribes+='000' if 'M' in subscribes: subscribes = subscribes.replace('M','') if '.' in subscribes : zeros = len(subscribes)-subscribes.index('.')-1 subscribes=subscribes.replace('.','') subscribes += '000000'[:6-zeros] if int(subscribes)>500: c_list.append(channel) search.next() with open('youtubeReport.csv', 'w', newline='',encoding='UTF-8') as csvfile: writer = csv.writer(csvfile) writer.writerow(['channelTitle','subscribes', 'description', 'videoTitle','publishedAt','viewCount','likeCount','favoriteCount','commentCount']) for ch in tqdm(c_list): channel_id = ch['id'] vlist = get_infoString(channel_id) for v in vlist: writer.writerow([v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]])