Unverified Commit 416da574 authored by Sergey M․'s avatar Sergey M․
Browse files

[ytsearch] Fix extraction (closes #26920)

parent 48c5663c
...@@ -3181,54 +3181,94 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor): ...@@ -3181,54 +3181,94 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
_MAX_RESULTS = float('inf') _MAX_RESULTS = float('inf')
IE_NAME = 'youtube:search' IE_NAME = 'youtube:search'
_SEARCH_KEY = 'ytsearch' _SEARCH_KEY = 'ytsearch'
_EXTRA_QUERY_ARGS = {} _SEARCH_PARAMS = None
_TESTS = [] _TESTS = []
def _get_n_results(self, query, n): def _entries(self, query, n):
"""Get a specified number of results for a query""" data = {
'context': {
videos = [] 'client': {
limit = n 'clientName': 'WEB',
'clientVersion': '2.20201021.03.00',
url_query = { }
'search_query': query.encode('utf-8'), },
'query': query,
} }
url_query.update(self._EXTRA_QUERY_ARGS) if self._SEARCH_PARAMS:
result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) data['params'] = self._SEARCH_PARAMS
total = 0
for pagenum in itertools.count(1): for page_num in itertools.count(1):
data = self._download_json( search = self._download_json(
result_url, video_id='query "%s"' % query, 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
note='Downloading page %s' % pagenum, video_id='query "%s"' % query,
errnote='Unable to download API page', note='Downloading page %s' % page_num,
query={'spf': 'navigate'}) errnote='Unable to download API page', fatal=False,
html_content = data[1]['body']['content'] data=json.dumps(data).encode('utf8'),
headers={'content-type': 'application/json'})
if 'class="search-message' in html_content: if not search:
raise ExtractorError(
'[youtube] No video results', expected=True)
new_videos = list(self._process_page(html_content))
videos += new_videos
if not new_videos or len(videos) > limit:
break break
next_link = self._html_search_regex( slr_contents = try_get(
r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next', search,
html_content, 'next link', default=None) (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
if next_link is None: lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
list)
if not slr_contents:
break break
result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link) isr_contents = try_get(
slr_contents,
lambda x: x[0]['itemSectionRenderer']['contents'],
list)
if not isr_contents:
break
for content in isr_contents:
if not isinstance(content, dict):
continue
video = content.get('videoRenderer')
if not isinstance(video, dict):
continue
video_id = video.get('videoId')
if not video_id:
continue
title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
view_count = int_or_none(self._search_regex(
r'^(\d+)', re.sub(r'\s', '', view_count_text),
'view count', default=None))
uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
total += 1
yield {
'_type': 'url_transparent',
'ie_key': YoutubeIE.ie_key(),
'id': video_id,
'url': video_id,
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'uploader': uploader,
}
if total == n:
return
token = try_get(
slr_contents,
lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
compat_str)
if not token:
break
data['continuation'] = token
if len(videos) > n: def _get_n_results(self, query, n):
videos = videos[:n] """Get a specified number of results for a query"""
return self.playlist_result(videos, query) return self.playlist_result(self._entries(query, n), query)
class YoutubeSearchDateIE(YoutubeSearchIE): class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date' IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_SEARCH_KEY = 'ytsearchdate' _SEARCH_KEY = 'ytsearchdate'
IE_DESC = 'YouTube.com searches, newest videos first' IE_DESC = 'YouTube.com searches, newest videos first'
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'} _SEARCH_PARAMS = 'CAI%3D'
class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor): class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment