302 lines
9.3 KiB
TypeScript
302 lines
9.3 KiB
TypeScript
/**
|
|
* Server-side link preview API endpoint (Node.js / Astro)
|
|
* Uses linkedom for reliable HTML parsing and automatic entity decoding
|
|
* Returns signed proxy URLs for images from untrusted domains
|
|
*/
|
|
|
|
import {
|
|
checkRateLimit,
|
|
recordRequest,
|
|
getCookieId,
|
|
generateNonce,
|
|
createNonceCookie,
|
|
} from '../../utils/rateLimit.ts';
|
|
import { signImageUrl } from './image-proxy.ts';
|
|
import { parseHTML } from 'linkedom';
|
|
import type { APIContext } from 'astro';
|
|
|
|
interface LinkPreviewMeta {
|
|
url: string;
|
|
title: string | null;
|
|
description: string | null;
|
|
image: string | null;
|
|
siteName: string | null;
|
|
type: string | null;
|
|
video: string | null;
|
|
themeColor: string | null;
|
|
trusted: boolean;
|
|
}
|
|
|
|
// Trusted domains that can be loaded client-side
|
|
const TRUSTED_DOMAINS = new Set([
|
|
'youtube.com', 'www.youtube.com', 'youtu.be', 'img.youtube.com', 'i.ytimg.com',
|
|
'instagram.com', 'www.instagram.com',
|
|
'twitter.com', 'x.com', 'www.twitter.com', 'pbs.twimg.com', 'abs.twimg.com',
|
|
'twitch.tv', 'www.twitch.tv', 'clips.twitch.tv',
|
|
'spotify.com', 'open.spotify.com',
|
|
'soundcloud.com', 'www.soundcloud.com',
|
|
'vimeo.com', 'www.vimeo.com',
|
|
'imgur.com', 'i.imgur.com',
|
|
'giphy.com', 'media.giphy.com',
|
|
'tenor.com', 'media.tenor.com',
|
|
'gfycat.com',
|
|
'reddit.com', 'www.reddit.com', 'v.redd.it', 'i.redd.it', 'preview.redd.it',
|
|
'github.com', 'gist.github.com', 'raw.githubusercontent.com', 'avatars.githubusercontent.com', 'user-images.githubusercontent.com',
|
|
'codepen.io', 'codesandbox.io',
|
|
'streamable.com', 'medal.tv',
|
|
'discord.com', 'cdn.discordapp.com', 'media.discordapp.net',
|
|
'picsum.photos', 'images.unsplash.com',
|
|
]);
|
|
|
|
function isTrustedDomain(url: string): boolean {
|
|
try {
|
|
const parsed = new URL(url);
|
|
return TRUSTED_DOMAINS.has(parsed.hostname);
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
async function getSafeImageUrl(imageUrl: string | null): Promise<string | null> {
|
|
if (!imageUrl) return null;
|
|
if (isTrustedDomain(imageUrl)) return imageUrl;
|
|
const signature = await signImageUrl(imageUrl);
|
|
return `/api/image-proxy?url=${encodeURIComponent(imageUrl)}&sig=${signature}`;
|
|
}
|
|
|
|
function parseMetaTags(html: string, url: string): LinkPreviewMeta {
|
|
const meta: LinkPreviewMeta = {
|
|
url,
|
|
title: null,
|
|
description: null,
|
|
image: null,
|
|
siteName: null,
|
|
type: null,
|
|
video: null,
|
|
themeColor: null,
|
|
trusted: false,
|
|
};
|
|
|
|
const decode = str => str?.replace(/&(#(?:x[0-9a-fA-F]+|\d+)|[a-zA-Z]+);/g,
|
|
(_, e) => e[0]==='#' ? String.fromCharCode(e[1]==='x'?parseInt(e.slice(2),16):parseInt(e.slice(1),10))
|
|
: ({amp:'&',lt:'<',gt:'>',quot:'"',apos:"'"}[e]||_));
|
|
|
|
const { document } = parseHTML(html);
|
|
|
|
|
|
// Open Graph / Twitter / fallback
|
|
meta.title =
|
|
decode(
|
|
document.querySelector('meta[property="og:title"]')?.getAttribute('content') ||
|
|
document.querySelector('meta[name="twitter:title"]')?.getAttribute('content') ||
|
|
document.querySelector('title')?.textContent || null
|
|
);
|
|
|
|
meta.description =
|
|
decode(
|
|
document.querySelector('meta[property="og:description"]')?.getAttribute('content') ||
|
|
document.querySelector('meta[name="twitter:description"]')?.getAttribute('content') ||
|
|
document.querySelector('meta[name="description"]')?.getAttribute('content') || null
|
|
);
|
|
|
|
meta.image =
|
|
decode(
|
|
document.querySelector('meta[property="og:image"]')?.getAttribute('content') ||
|
|
document.querySelector('meta[name="twitter:image"]')?.getAttribute('content') || null
|
|
);
|
|
|
|
meta.siteName =
|
|
decode(
|
|
document.querySelector('meta[property="og:site_name"]')?.getAttribute('content') ||
|
|
new URL(url).hostname.replace(/^www\./, '')
|
|
);
|
|
|
|
meta.type =
|
|
decode(
|
|
document.querySelector('meta[property="og:type"]')?.getAttribute('content') || null
|
|
);
|
|
|
|
meta.video =
|
|
decode(
|
|
document.querySelector('meta[property="og:video"]')?.getAttribute('content') || null
|
|
);
|
|
|
|
meta.themeColor =
|
|
decode(
|
|
document.querySelector('meta[name="theme-color"]')?.getAttribute('content') || null
|
|
);
|
|
|
|
// Resolve relative image URLs
|
|
if (meta.image && !meta.image.startsWith('http')) {
|
|
try {
|
|
meta.image = decode(new URL(meta.image, new URL(url).origin).href);
|
|
} catch {
|
|
meta.image = null;
|
|
}
|
|
}
|
|
|
|
return meta;
|
|
}
|
|
|
|
export async function GET({ request }) {
|
|
// Rate limit
|
|
const rateCheck = checkRateLimit(request, {
|
|
limit: 10,
|
|
windowMs: 1000,
|
|
burstLimit: 50,
|
|
burstWindowMs: 10_000,
|
|
});
|
|
|
|
let cookieId = getCookieId(request);
|
|
const hadCookie = !!cookieId;
|
|
if (!cookieId) cookieId = generateNonce();
|
|
|
|
if (!rateCheck.allowed) {
|
|
const errorMsg = rateCheck.isFlooding
|
|
? { error: 'Too many requests - please slow down' }
|
|
: { error: 'Rate limit exceeded' };
|
|
const resp = new Response(JSON.stringify(errorMsg), {
|
|
status: 429,
|
|
headers: { 'Content-Type': 'application/json', 'Retry-After': '1' },
|
|
});
|
|
if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId));
|
|
return resp;
|
|
}
|
|
|
|
recordRequest(request, 1000);
|
|
|
|
const url = new URL(request.url);
|
|
const targetUrl = url.searchParams.get('url');
|
|
|
|
if (!targetUrl) {
|
|
return new Response(JSON.stringify({ error: 'Missing url parameter' }), {
|
|
status: 400,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
});
|
|
}
|
|
|
|
// Validate URL
|
|
let parsedUrl;
|
|
try {
|
|
parsedUrl = new URL(targetUrl);
|
|
if (!['http:', 'https:'].includes(parsedUrl.protocol)) throw new Error();
|
|
} catch {
|
|
return new Response(JSON.stringify({ error: 'Invalid URL' }), {
|
|
status: 400,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
});
|
|
}
|
|
|
|
const trusted = isTrustedDomain(targetUrl);
|
|
|
|
try {
|
|
const controller = new AbortController();
|
|
const timeout = setTimeout(() => controller.abort(), 8000);
|
|
|
|
const response = await fetch(targetUrl, {
|
|
method: 'GET',
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (compatible; DiscordBot/2.0; +https://discordapp.com)',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
'Accept-Language': 'en-US,en;q=0.5',
|
|
},
|
|
signal: controller.signal,
|
|
redirect: 'follow',
|
|
});
|
|
|
|
clearTimeout(timeout);
|
|
|
|
if (!response.ok) {
|
|
return new Response(JSON.stringify({
|
|
error: 'Failed to fetch URL',
|
|
status: response.status
|
|
}), {
|
|
status: 502,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
});
|
|
}
|
|
|
|
const contentType = response.headers.get('content-type') || '';
|
|
|
|
// Handle direct image
|
|
if (contentType.startsWith('image/')) {
|
|
const safeImageUrl = await getSafeImageUrl(targetUrl);
|
|
const result = { url: targetUrl, type: 'image', image: safeImageUrl, trusted };
|
|
const resp = new Response(JSON.stringify(result), {
|
|
status: 200,
|
|
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' },
|
|
});
|
|
if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId));
|
|
return resp;
|
|
}
|
|
|
|
// Handle direct video
|
|
if (contentType.startsWith('video/')) {
|
|
if (!trusted) {
|
|
return new Response(JSON.stringify({ error: 'Untrusted video source' }), {
|
|
status: 403,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
});
|
|
}
|
|
const result = { url: targetUrl, type: 'video', video: targetUrl, trusted };
|
|
const resp = new Response(JSON.stringify(result), {
|
|
status: 200,
|
|
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' },
|
|
});
|
|
if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId));
|
|
return resp;
|
|
}
|
|
|
|
if (!contentType.includes('text/html') && !contentType.includes('application/xhtml')) {
|
|
return new Response(JSON.stringify({ error: 'URL is not an HTML page', contentType }), {
|
|
status: 400,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
});
|
|
}
|
|
|
|
// Read first 50KB
|
|
if (!response.body) {
|
|
return new Response(JSON.stringify({ error: 'Empty response body' }), {
|
|
status: 502,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
});
|
|
}
|
|
|
|
const reader = response.body.getReader();
|
|
let html = '';
|
|
let bytesRead = 0;
|
|
const maxBytes = 50 * 1024;
|
|
|
|
while (bytesRead < maxBytes) {
|
|
const { done, value } = await reader.read();
|
|
if (done) break;
|
|
html += new TextDecoder().decode(value);
|
|
bytesRead += value.length;
|
|
if (html.includes('</head>')) break;
|
|
}
|
|
reader.cancel();
|
|
|
|
const meta = parseMetaTags(html, targetUrl);
|
|
meta.trusted = trusted;
|
|
|
|
// Convert image to safe URL
|
|
if (meta.image) meta.image = await getSafeImageUrl(meta.image);
|
|
|
|
const resp = new Response(JSON.stringify(meta), {
|
|
status: 200,
|
|
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' },
|
|
});
|
|
if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId));
|
|
return resp;
|
|
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err);
|
|
console.error('[link-preview] Error fetching URL:', message);
|
|
// Don't expose internal error details to client
|
|
return new Response(JSON.stringify({ error: 'Failed to fetch preview' }), {
|
|
status: 500,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
});
|
|
}
|
|
}
|