[telegram] Add extractor (#2922)

Closes #2910

Authored by: hatienl0i261299
This commit is contained in:
Ha Tien Loi
2022-03-04 18:18:46 +07:00
committed by GitHub
parent ded9f32667
commit 5bcccbfec3
2 changed files with 38 additions and 0 deletions

View File

@@ -1593,6 +1593,7 @@ from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
from .telegram import TelegramEmbedIE
from .telemb import TeleMBIE
from .telemundo import TelemundoIE
from .telequebec import (

View File

@@ -0,0 +1,37 @@
from .common import InfoExtractor
class TelegramEmbedIE(InfoExtractor):
IE_NAME = 'telegram:embed'
_VALID_URL = r'https?://t\.me/(?P<channel_name>[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://t.me/europa_press/613',
'info_dict': {
'id': '613',
'ext': 'mp4',
'title': 'Europa Press',
'description': '6ce2d7e8d56eda16d80607b23db7b252',
'thumbnail': r're:^https?:\/\/cdn.*?telesco\.pe\/file\/\w+',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage_embed = self._download_webpage(f'{url}?embed=1', video_id)
formats = [{
'url': self._proto_relative_url(self._search_regex(
'<video[^>]+src="([^"]+)"', webpage_embed, 'source')),
'ext': 'mp4',
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True),
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True),
'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
webpage_embed, 'thumbnail'),
'formats': formats,
}