yt-playlist-features/src/content/extractor.ts

224 lines
6.1 KiB
TypeScript

import type {
PlaylistData,
PlaylistMetadata,
PlaylistVideo,
Thumbnail,
} from "../types/playlist";
/* eslint-disable @typescript-eslint/no-explicit-any */
export interface InitialParseResult {
metadata: PlaylistMetadata;
videos: PlaylistVideo[];
}
export function parseInitialData(raw: any): InitialParseResult | null {
try {
const metadata = extractMetadata(raw);
if (!metadata) return null;
const videoListContents = findVideoListContents(raw);
if (!videoListContents) return null;
const { videos } = parseVideoList(videoListContents);
return { metadata, videos };
} catch (e) {
console.error("[yt-playlist-features] Failed to parse playlist data:", e);
return null;
}
}
export function buildPlaylistData(
metadata: PlaylistMetadata,
videos: PlaylistVideo[],
isComplete: boolean,
): PlaylistData {
return {
metadata,
videos,
extractedAt: new Date().toISOString(),
isComplete,
extractedCount: videos.length,
};
}
// --- internal helpers ---
function parseVideoList(contents: any[]): {
videos: PlaylistVideo[];
} {
const videos: PlaylistVideo[] = [];
for (const item of contents) {
if (item.playlistVideoRenderer) {
const video = parseVideo(item.playlistVideoRenderer);
if (video) videos.push(video);
}
}
return { videos };
}
function extractMetadata(raw: any): PlaylistMetadata | null {
const playlistId = extractPlaylistId(raw);
if (!playlistId) return null;
const metadataRenderer = raw?.metadata?.playlistMetadataRenderer;
const title = metadataRenderer?.title ?? "";
const sidebarItems =
raw?.sidebar?.playlistSidebarRenderer?.items ?? [];
const primaryInfo = sidebarItems[0]?.playlistSidebarPrimaryInfoRenderer;
const secondaryInfo = sidebarItems[1]?.playlistSidebarSecondaryInfoRenderer;
const stats = primaryInfo?.stats ?? [];
const videoCount = parseVideoCount(stats[0]);
const viewCountText = extractText(stats[1]) || null;
const lastUpdatedText = extractText(stats[2]) || null;
const pageHeaderVM =
raw?.header?.pageHeaderRenderer?.content?.pageHeaderViewModel;
const description =
extractText(pageHeaderVM?.description?.descriptionPreviewViewModel?.description) ||
extractText(primaryInfo?.description) ||
"";
const ownerRenderer =
secondaryInfo?.videoOwner?.videoOwnerRenderer;
const ownerRun = ownerRenderer?.title?.runs?.[0];
const ownerEndpoint =
ownerRun?.navigationEndpoint?.browseEndpoint;
const thumbnails = extractPlaylistThumbnails(pageHeaderVM, primaryInfo);
return {
playlistId,
title,
description,
videoCount,
totalDurationText: null,
viewCountText,
lastUpdatedText,
thumbnails,
owner: {
name: ownerRun?.text ?? "",
channelId: ownerEndpoint?.browseId ?? "",
url: ownerRun?.navigationEndpoint?.commandMetadata
?.webCommandMetadata?.url ?? "",
},
privacy: "unknown",
};
}
function extractPlaylistId(raw: any): string | null {
const microformat =
raw?.microformat?.microformatDataRenderer?.urlCanonical;
if (microformat) {
const match = microformat.match(/[?&]list=([^&]+)/);
if (match) return match[1];
}
const appLink =
raw?.metadata?.playlistMetadataRenderer?.androidAppindexingLink;
if (appLink) {
const match = appLink.match(/[?&]list=([^&]+)/);
if (match) return match[1];
}
const url = new URL(window.location.href);
return url.searchParams.get("list");
}
function findVideoListContents(raw: any): any[] | null {
const tabs =
raw?.contents?.twoColumnBrowseResultsRenderer?.tabs;
if (!tabs?.length) return null;
const tabContent = tabs[0]?.tabRenderer?.content;
const sectionContents =
tabContent?.sectionListRenderer?.contents;
if (!sectionContents?.length) return null;
const itemSection =
sectionContents[0]?.itemSectionRenderer?.contents;
if (!itemSection?.length) return null;
return itemSection[0]?.playlistVideoListRenderer?.contents ?? null;
}
function parseVideo(renderer: any): PlaylistVideo | null {
const videoId = renderer.videoId;
if (!videoId) return null;
const bylineRun = renderer.shortBylineText?.runs?.[0];
const bylineEndpoint =
bylineRun?.navigationEndpoint?.browseEndpoint;
const lengthSeconds = renderer.lengthSeconds
? parseInt(renderer.lengthSeconds, 10)
: null;
return {
videoId,
title: extractText(renderer.title),
index: parseInt(extractText(renderer.index) || "0", 10),
durationSeconds: lengthSeconds,
durationText: extractText(renderer.lengthText) || null,
thumbnails: renderer.thumbnail?.thumbnails ?? [],
channel: {
name: bylineRun?.text ?? "",
channelId: bylineEndpoint?.browseId ?? "",
url: bylineRun?.navigationEndpoint?.commandMetadata
?.webCommandMetadata?.url ?? "",
},
isPlayable: renderer.isPlayable !== false,
isLive:
renderer.badges?.some(
(b: any) =>
b.metadataBadgeRenderer?.style === "BADGE_STYLE_TYPE_LIVE_NOW",
) ?? false,
viewCountText: null,
publishedAt: null,
category: null,
addedBy: null,
voteCount: null,
};
}
function extractText(textObj: any): string {
if (!textObj) return "";
if (typeof textObj === "string") return textObj;
if (textObj.simpleText) return textObj.simpleText;
if (textObj.runs) {
return textObj.runs.map((r: any) => r.text).join("");
}
if (textObj.content) return extractText(textObj.content);
return "";
}
function extractPlaylistThumbnails(
pageHeaderVM: any,
primaryInfo: any,
): Thumbnail[] {
const heroThumbnails =
pageHeaderVM?.heroImage?.contentPreviewImageViewModel?.image?.sources;
if (heroThumbnails?.length) {
return heroThumbnails.map((t: any) => ({
url: t.url ?? "",
width: t.width ?? 0,
height: t.height ?? 0,
}));
}
return (
primaryInfo?.thumbnailRenderer?.playlistVideoThumbnailRenderer?.thumbnail
?.thumbnails ??
primaryInfo?.thumbnailRenderer?.playlistCustomThumbnailRenderer?.thumbnail
?.thumbnails ??
[]
);
}
function parseVideoCount(textObj: any): number {
const text = extractText(textObj);
const match = text.replace(/,/g, "").match(/(\d+)/);
return match ? parseInt(match[1], 10) : 0;
}