/** * Server-side link preview API endpoint (Node.js / Astro) * Uses linkedom for reliable HTML parsing and automatic entity decoding * Returns signed proxy URLs for images from untrusted domains */ import { checkRateLimit, recordRequest, getCookieId, generateNonce, createNonceCookie, } from '../../utils/rateLimit.ts'; import { signImageUrl } from './image-proxy.ts'; import { parseHTML } from 'linkedom'; import type { APIContext } from 'astro'; interface LinkPreviewMeta { url: string; title: string | null; description: string | null; image: string | null; siteName: string | null; type: string | null; video: string | null; themeColor: string | null; } // Trusted domains that can be loaded client-side const TRUSTED_DOMAINS = new Set([ 'youtube.com', 'www.youtube.com', 'youtu.be', 'img.youtube.com', 'i.ytimg.com', 'instagram.com', 'www.instagram.com', 'twitter.com', 'x.com', 'www.twitter.com', 'pbs.twimg.com', 'abs.twimg.com', 'twitch.tv', 'www.twitch.tv', 'clips.twitch.tv', 'spotify.com', 'open.spotify.com', 'soundcloud.com', 'www.soundcloud.com', 'vimeo.com', 'www.vimeo.com', 'imgur.com', 'i.imgur.com', 'giphy.com', 'media.giphy.com', 'tenor.com', 'media.tenor.com', 'gfycat.com', 'reddit.com', 'www.reddit.com', 'v.redd.it', 'i.redd.it', 'preview.redd.it', 'github.com', 'gist.github.com', 'raw.githubusercontent.com', 'avatars.githubusercontent.com', 'user-images.githubusercontent.com', 'codepen.io', 'codesandbox.io', 'streamable.com', 'medal.tv', 'discord.com', 'cdn.discordapp.com', 'media.discordapp.net', 'picsum.photos', 'images.unsplash.com', ]); function isTrustedDomain(url: string): boolean { try { const parsed = new URL(url); return TRUSTED_DOMAINS.has(parsed.hostname); } catch { return false; } } async function getSafeImageUrl(imageUrl: string | null): Promise { if (!imageUrl) return null; if (isTrustedDomain(imageUrl)) return imageUrl; const signature = await signImageUrl(imageUrl); return `/api/image-proxy?url=${encodeURIComponent(imageUrl)}&sig=${signature}`; } function parseMetaTags(html: string, url: string): LinkPreviewMeta { const meta: LinkPreviewMeta = { url, title: null, description: null, image: null, siteName: null, type: null, video: null, themeColor: null, }; const decode = str => str?.replace(/&(#(?:x[0-9a-fA-F]+|\d+)|[a-zA-Z]+);/g, (_, e) => e[0]==='#' ? String.fromCharCode(e[1]==='x'?parseInt(e.slice(2),16):parseInt(e.slice(1),10)) : ({amp:'&',lt:'<',gt:'>',quot:'"',apos:"'"}[e]||_)); const { document } = parseHTML(html); // Open Graph / Twitter / fallback meta.title = decode( document.querySelector('meta[property="og:title"]')?.getAttribute('content') || document.querySelector('meta[name="twitter:title"]')?.getAttribute('content') || document.querySelector('title')?.textContent || null ); meta.description = decode( document.querySelector('meta[property="og:description"]')?.getAttribute('content') || document.querySelector('meta[name="twitter:description"]')?.getAttribute('content') || document.querySelector('meta[name="description"]')?.getAttribute('content') || null ); meta.image = decode( document.querySelector('meta[property="og:image"]')?.getAttribute('content') || document.querySelector('meta[name="twitter:image"]')?.getAttribute('content') || null ); meta.siteName = decode( document.querySelector('meta[property="og:site_name"]')?.getAttribute('content') || new URL(url).hostname.replace(/^www\./, '') ); meta.type = decode( document.querySelector('meta[property="og:type"]')?.getAttribute('content') || null ); meta.video = decode( document.querySelector('meta[property="og:video"]')?.getAttribute('content') || null ); meta.themeColor = decode( document.querySelector('meta[name="theme-color"]')?.getAttribute('content') || null ); // Resolve relative image URLs if (meta.image && !meta.image.startsWith('http')) { try { meta.image = decode(new URL(meta.image, new URL(url).origin).href); } catch { meta.image = null; } } return meta; } export async function GET({ request }) { // Rate limit const rateCheck = checkRateLimit(request, { limit: 10, windowMs: 1000, burstLimit: 50, burstWindowMs: 10_000, }); let cookieId = getCookieId(request); const hadCookie = !!cookieId; if (!cookieId) cookieId = generateNonce(); if (!rateCheck.allowed) { const errorMsg = rateCheck.isFlooding ? { error: 'Too many requests - please slow down' } : { error: 'Rate limit exceeded' }; const resp = new Response(JSON.stringify(errorMsg), { status: 429, headers: { 'Content-Type': 'application/json', 'Retry-After': '1' }, }); if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId)); return resp; } recordRequest(request, 1000); const url = new URL(request.url); const targetUrl = url.searchParams.get('url'); if (!targetUrl) { return new Response(JSON.stringify({ error: 'Missing url parameter' }), { status: 400, headers: { 'Content-Type': 'application/json' }, }); } // Validate URL let parsedUrl; try { parsedUrl = new URL(targetUrl); if (!['http:', 'https:'].includes(parsedUrl.protocol)) throw new Error(); } catch { return new Response(JSON.stringify({ error: 'Invalid URL' }), { status: 400, headers: { 'Content-Type': 'application/json' }, }); } const trusted = isTrustedDomain(targetUrl); try { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), 8000); const response = await fetch(targetUrl, { method: 'GET', headers: { 'User-Agent': 'Mozilla/5.0 (compatible; DiscordBot/2.0; +https://discordapp.com)', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', }, signal: controller.signal, redirect: 'follow', }); clearTimeout(timeout); if (!response.ok) { return new Response(JSON.stringify({ error: 'Failed to fetch URL', status: response.status }), { status: 502, headers: { 'Content-Type': 'application/json' }, }); } const contentType = response.headers.get('content-type') || ''; // Handle direct image if (contentType.startsWith('image/')) { const safeImageUrl = await getSafeImageUrl(targetUrl); const result = { url: targetUrl, type: 'image', image: safeImageUrl, trusted }; const resp = new Response(JSON.stringify(result), { status: 200, headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' }, }); if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId)); return resp; } // Handle direct video if (contentType.startsWith('video/')) { if (!trusted) { return new Response(JSON.stringify({ error: 'Untrusted video source' }), { status: 403, headers: { 'Content-Type': 'application/json' }, }); } const result = { url: targetUrl, type: 'video', video: targetUrl, trusted }; const resp = new Response(JSON.stringify(result), { status: 200, headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' }, }); if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId)); return resp; } if (!contentType.includes('text/html') && !contentType.includes('application/xhtml')) { return new Response(JSON.stringify({ error: 'URL is not an HTML page', contentType }), { status: 400, headers: { 'Content-Type': 'application/json' }, }); } // Read first 50KB const reader = response.body.getReader(); let html = ''; let bytesRead = 0; const maxBytes = 50 * 1024; while (bytesRead < maxBytes) { const { done, value } = await reader.read(); if (done) break; html += new TextDecoder().decode(value); bytesRead += value.length; if (html.includes('')) break; } reader.cancel(); const meta = parseMetaTags(html, targetUrl); meta.trusted = trusted; // Convert image to safe URL if (meta.image) meta.image = await getSafeImageUrl(meta.image); const resp = new Response(JSON.stringify(meta), { status: 200, headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' }, }); if (!hadCookie) resp.headers.set('Set-Cookie', createNonceCookie(cookieId)); return resp; } catch (err) { console.error('[link-preview] Error fetching URL:', err.message); // Don't expose internal error details to client return new Response(JSON.stringify({ error: 'Failed to fetch preview' }), { status: 500, headers: { 'Content-Type': 'application/json' }, }); } }