Files
codey.lol/src/pages/api/link-preview.js

280 lines
8.6 KiB
JavaScript
Raw Normal View History

/**
* Server-side link preview API endpoint (Node.js / Astro)
* Uses linkedom for reliable HTML parsing and automatic entity decoding
* Returns signed proxy URLs for images from untrusted domains
*/
import {
checkRateLimit,
recordRequest,
getCookieId,
generateNonce,
createNonceCookie,
} from '../../utils/rateLimit.js';
import { signImageUrl } from './image-proxy.js';
import { parseHTML } from 'linkedom';
// Trusted domains that can be loaded client-side
const TRUSTED_DOMAINS = new Set([
'youtube.com', 'www.youtube.com', 'youtu.be', 'img.youtube.com', 'i.ytimg.com',
'instagram.com', 'www.instagram.com',
'twitter.com', 'x.com', 'www.twitter.com', 'pbs.twimg.com', 'abs.twimg.com',
'twitch.tv', 'www.twitch.tv', 'clips.twitch.tv',
'spotify.com', 'open.spotify.com',
'soundcloud.com', 'www.soundcloud.com',
'vimeo.com', 'www.vimeo.com',
'imgur.com', 'i.imgur.com',
'giphy.com', 'media.giphy.com',
'tenor.com', 'media.tenor.com',
'gfycat.com',
'reddit.com', 'www.reddit.com', 'v.redd.it', 'i.redd.it', 'preview.redd.it',
'github.com', 'gist.github.com', 'raw.githubusercontent.com', 'avatars.githubusercontent.com', 'user-images.githubusercontent.com',
'codepen.io', 'codesandbox.io',
'streamable.com', 'medal.tv',
'discord.com', 'cdn.discordapp.com', 'media.discordapp.net',
'picsum.photos', 'images.unsplash.com',
]);
function isTrustedDomain(url) {
try {
const parsed = new URL(url);
return TRUSTED_DOMAINS.has(parsed.hostname);
} catch {
return false;
}
}
async function getSafeImageUrl(imageUrl) {
if (!imageUrl) return null;
if (isTrustedDomain(imageUrl)) return imageUrl;
const signature = await signImageUrl(imageUrl);
return `/api/image-proxy?url=${encodeURIComponent(imageUrl)}&sig=${signature}`;
}
function parseMetaTags(html, url) {
const meta = {
url,
title: null,
description: null,
image: null,
siteName: null,
type: null,
video: null,
themeColor: null,
};
const decode = str => str?.replace(/&(#(?:x[0-9a-fA-F]+|\d+)|[a-zA-Z]+);/g,
(_, e) => e[0]==='#' ? String.fromCharCode(e[1]==='x'?parseInt(e.slice(2),16):parseInt(e.slice(1),10))
: ({amp:'&',lt:'<',gt:'>',quot:'"',apos:"'"}[e]||_));
const { document } = parseHTML(html);
// Open Graph / Twitter / fallback
meta.title =
decode(
document.querySelector('meta[property="og:title"]')?.getAttribute('content') ||
document.querySelector('meta[name="twitter:title"]')?.getAttribute('content') ||
document.querySelector('title')?.textContent || null
);
meta.description =
decode(
document.querySelector('meta[property="og:description"]')?.getAttribute('content') ||
document.querySelector('meta[name="twitter:description"]')?.getAttribute('content') ||
document.querySelector('meta[name="description"]')?.getAttribute('content') || null
);
meta.image =
decode(
document.querySelector('meta[property="og:image"]')?.getAttribute('content') ||
document.querySelector('meta[name="twitter:image"]')?.getAttribute('content') || null
);
meta.siteName =
decode(
document.querySelector('meta[property="og:site_name"]')?.getAttribute('content') ||
new URL(url).hostname.replace(/^www\./, '')
);
meta.type =
decode(
document.querySelector('meta[property="og:type"]')?.getAttribute('content') || null
);
meta.video =
decode(
document.querySelector('meta[property="og:video"]')?.getAttribute('content') || null
);
meta.themeColor =
decode(
document.querySelector('meta[name="theme-color"]')?.getAttribute('content') || null
);
// Resolve relative image URLs
if (meta.image && !meta.image.startsWith('http')) {
try {
meta.image = decode(new URL(meta.image, new URL(url).origin).href);
} catch {
meta.image = null;
}
}
return meta;
}
export async function GET({ request }) {
// Rate limit
const rateCheck = checkRateLimit(request, {
limit: 10,
windowMs: 1000,
burstLimit: 50,
burstWindowMs: 10_000,
});
let cookieId = getCookieId(request);
const hadCookie = !!cookieId;
if (!cookieId) cookieId = generateNonce();
if (!rateCheck.allowed) {
const errorMsg = rateCheck.isFlooding
? { error: 'Too many requests - please slow down' }
: { error: 'Rate limit exceeded' };
const resp = new Response(JSON.stringify(errorMsg), {
status: 429,
headers: { 'Content-Type': 'application/json', 'Retry-After': '1' },
});
if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId));
return resp;
}
recordRequest(request, 1000);
const url = new URL(request.url);
const targetUrl = url.searchParams.get('url');
if (!targetUrl) {
return new Response(JSON.stringify({ error: 'Missing url parameter' }), {
status: 400,
headers: { 'Content-Type': 'application/json' },
});
}
// Validate URL
let parsedUrl;
try {
parsedUrl = new URL(targetUrl);
if (!['http:', 'https:'].includes(parsedUrl.protocol)) throw new Error();
} catch {
return new Response(JSON.stringify({ error: 'Invalid URL' }), {
status: 400,
headers: { 'Content-Type': 'application/json' },
});
}
const trusted = isTrustedDomain(targetUrl);
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 8000);
const response = await fetch(targetUrl, {
method: 'GET',
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; DiscordBot/2.0; +https://discordapp.com)',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
},
signal: controller.signal,
redirect: 'follow',
});
clearTimeout(timeout);
if (!response.ok) {
return new Response(JSON.stringify({
error: 'Failed to fetch URL',
status: response.status
}), {
status: 502,
headers: { 'Content-Type': 'application/json' },
});
}
const contentType = response.headers.get('content-type') || '';
// Handle direct image
if (contentType.startsWith('image/')) {
const safeImageUrl = await getSafeImageUrl(targetUrl);
const result = { url: targetUrl, type: 'image', image: safeImageUrl, trusted };
const resp = new Response(JSON.stringify(result), {
status: 200,
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' },
});
if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId));
return resp;
}
// Handle direct video
if (contentType.startsWith('video/')) {
if (!trusted) {
return new Response(JSON.stringify({ error: 'Untrusted video source' }), {
status: 403,
headers: { 'Content-Type': 'application/json' },
});
}
const result = { url: targetUrl, type: 'video', video: targetUrl, trusted };
const resp = new Response(JSON.stringify(result), {
status: 200,
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' },
});
if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId));
return resp;
}
if (!contentType.includes('text/html') && !contentType.includes('application/xhtml')) {
return new Response(JSON.stringify({ error: 'URL is not an HTML page', contentType }), {
status: 400,
headers: { 'Content-Type': 'application/json' },
});
}
// Read first 50KB
const reader = response.body.getReader();
let html = '';
let bytesRead = 0;
const maxBytes = 50 * 1024;
while (bytesRead < maxBytes) {
const { done, value } = await reader.read();
if (done) break;
html += new TextDecoder().decode(value);
bytesRead += value.length;
if (html.includes('</head>')) break;
}
reader.cancel();
const meta = parseMetaTags(html, targetUrl);
meta.trusted = trusted;
// Convert image to safe URL
if (meta.image) meta.image = await getSafeImageUrl(meta.image);
const resp = new Response(JSON.stringify(meta), {
status: 200,
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' },
});
if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId));
return resp;
} catch (err) {
console.error('[link-preview] Error fetching URL:', err.message);
// Don't expose internal error details to client
return new Response(JSON.stringify({ error: 'Failed to fetch preview' }), {
status: 500,
headers: { 'Content-Type': 'application/json' },
});
}
}