/** * Server-side link preview API endpoint * Fetches Open Graph / meta data for URLs to prevent user IP exposure * Returns signed proxy URLs for images from untrusted domains */ import { checkRateLimit, recordRequest, getCookieId, generateNonce, createNonceCookie, } from '../../utils/rateLimit.js'; import { signImageUrl } from './image-proxy.js'; // Trusted domains that can be loaded client-side (embed-safe providers) const TRUSTED_DOMAINS = new Set([ 'youtube.com', 'www.youtube.com', 'youtu.be', 'img.youtube.com', 'i.ytimg.com', 'instagram.com', 'www.instagram.com', 'twitter.com', 'x.com', 'www.twitter.com', 'pbs.twimg.com', 'abs.twimg.com', 'twitch.tv', 'www.twitch.tv', 'clips.twitch.tv', 'spotify.com', 'open.spotify.com', 'soundcloud.com', 'www.soundcloud.com', 'vimeo.com', 'www.vimeo.com', 'imgur.com', 'i.imgur.com', 'giphy.com', 'media.giphy.com', 'tenor.com', 'media.tenor.com', 'gfycat.com', 'reddit.com', 'www.reddit.com', 'v.redd.it', 'i.redd.it', 'preview.redd.it', 'github.com', 'gist.github.com', 'raw.githubusercontent.com', 'avatars.githubusercontent.com', 'user-images.githubusercontent.com', 'codepen.io', 'codesandbox.io', 'streamable.com', 'medal.tv', 'discord.com', 'cdn.discordapp.com', 'media.discordapp.net', 'picsum.photos', 'images.unsplash.com', ]); /** * Check if a URL is from a trusted domain */ function isTrustedDomain(url) { try { const parsed = new URL(url); return TRUSTED_DOMAINS.has(parsed.hostname); } catch { return false; } } /** * Get a safe image URL - either direct (if trusted) or signed proxy URL */ async function getSafeImageUrl(imageUrl) { if (!imageUrl) return null; if (isTrustedDomain(imageUrl)) { return imageUrl; // Trusted, return as-is } // Create signed proxy URL const signature = await signImageUrl(imageUrl); return `/api/image-proxy?url=${encodeURIComponent(imageUrl)}&sig=${signature}`; } /** * Parse Open Graph and meta tags from HTML */ function parseMetaTags(html, url) { const meta = { url, title: null, description: null, image: null, siteName: null, type: null, video: null, themeColor: null, }; // Helper to extract content from meta tags const getMetaContent = (pattern) => { const match = html.match(pattern); return match ? decodeHTMLEntities(match[1]) : null; }; // Open Graph tags meta.title = getMetaContent(/]+property=["']og:title["'][^>]+content=["']([^"']+)["']/i) || getMetaContent(/]+content=["']([^"']+)["'][^>]+property=["']og:title["']/i); meta.description = getMetaContent(/]+property=["']og:description["'][^>]+content=["']([^"']+)["']/i) || getMetaContent(/]+content=["']([^"']+)["'][^>]+property=["']og:description["']/i); meta.image = getMetaContent(/]+property=["']og:image["'][^>]+content=["']([^"']+)["']/i) || getMetaContent(/]+content=["']([^"']+)["'][^>]+property=["']og:image["']/i); meta.siteName = getMetaContent(/]+property=["']og:site_name["'][^>]+content=["']([^"']+)["']/i) || getMetaContent(/]+content=["']([^"']+)["'][^>]+property=["']og:site_name["']/i); meta.type = getMetaContent(/]+property=["']og:type["'][^>]+content=["']([^"']+)["']/i) || getMetaContent(/]+content=["']([^"']+)["'][^>]+property=["']og:type["']/i); meta.video = getMetaContent(/]+property=["']og:video(?::url)?["'][^>]+content=["']([^"']+)["']/i) || getMetaContent(/]+content=["']([^"']+)["'][^>]+property=["']og:video(?::url)?["']/i); // Twitter cards fallback if (!meta.title) { meta.title = getMetaContent(/]+name=["']twitter:title["'][^>]+content=["']([^"']+)["']/i) || getMetaContent(/]+content=["']([^"']+)["'][^>]+name=["']twitter:title["']/i); } if (!meta.description) { meta.description = getMetaContent(/]+name=["']twitter:description["'][^>]+content=["']([^"']+)["']/i) || getMetaContent(/]+content=["']([^"']+)["'][^>]+name=["']twitter:description["']/i); } if (!meta.image) { meta.image = getMetaContent(/]+name=["']twitter:image["'][^>]+content=["']([^"']+)["']/i) || getMetaContent(/]+content=["']([^"']+)["'][^>]+name=["']twitter:image["']/i); } // Theme color meta.themeColor = getMetaContent(/]+name=["']theme-color["'][^>]+content=["']([^"']+)["']/i) || getMetaContent(/]+content=["']([^"']+)["'][^>]+name=["']theme-color["']/i); // Fallback to standard meta tags and title if (!meta.title) { const titleMatch = html.match(/]*>([^<]+)<\/title>/i); meta.title = titleMatch ? decodeHTMLEntities(titleMatch[1]) : null; } if (!meta.description) { meta.description = getMetaContent(/]+name=["']description["'][^>]+content=["']([^"']+)["']/i) || getMetaContent(/]+content=["']([^"']+)["'][^>]+name=["']description["']/i); } // Resolve relative image URLs if (meta.image && !meta.image.startsWith('http')) { try { const baseUrl = new URL(url); meta.image = new URL(meta.image, baseUrl.origin).href; } catch { meta.image = null; } } // Get site name from domain if not found if (!meta.siteName) { try { const parsed = new URL(url); meta.siteName = parsed.hostname.replace(/^www\./, ''); } catch { // ignore } } return meta; } /** * Decode HTML entities */ function decodeHTMLEntities(text) { if (!text) return text; return text .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/'/g, "'") .replace(///g, '/') .replace(/&#(\d+);/g, (_, num) => String.fromCharCode(parseInt(num, 10))) .replace(/&#x([a-fA-F0-9]+);/g, (_, hex) => String.fromCharCode(parseInt(hex, 16))); } export async function GET({ request }) { // Rate limit check const rateCheck = checkRateLimit(request, { limit: 10, windowMs: 1000, burstLimit: 50, burstWindowMs: 10_000, }); let cookieId = getCookieId(request); const hadCookie = !!cookieId; if (!cookieId) { cookieId = generateNonce(); } if (!rateCheck.allowed) { const errorMsg = rateCheck.isFlooding ? { error: 'Too many requests - please slow down' } : { error: 'Rate limit exceeded' }; const response = new Response(JSON.stringify(errorMsg), { status: 429, headers: { 'Content-Type': 'application/json', 'Retry-After': '1', }, }); if (!hadCookie) { response.headers.set('Set-Cookie', createNonceCookie(cookieId)); } return response; } recordRequest(request, 1000); const url = new URL(request.url); const targetUrl = url.searchParams.get('url'); if (!targetUrl) { return new Response(JSON.stringify({ error: 'Missing url parameter' }), { status: 400, headers: { 'Content-Type': 'application/json' }, }); } // Validate URL format let parsedUrl; try { parsedUrl = new URL(targetUrl); if (!['http:', 'https:'].includes(parsedUrl.protocol)) { throw new Error('Invalid protocol'); } } catch { return new Response(JSON.stringify({ error: 'Invalid URL' }), { status: 400, headers: { 'Content-Type': 'application/json' }, }); } // Check if it's a trusted domain (client can fetch directly) const trusted = isTrustedDomain(targetUrl); try { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), 8000); const response = await fetch(targetUrl, { method: 'GET', headers: { 'User-Agent': 'Mozilla/5.0 (compatible; DiscordBot/2.0; +https://discordapp.com)', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', }, signal: controller.signal, redirect: 'follow', }); clearTimeout(timeout); if (!response.ok) { return new Response(JSON.stringify({ error: 'Failed to fetch URL', status: response.status }), { status: 502, headers: { 'Content-Type': 'application/json' }, }); } const contentType = response.headers.get('content-type') || ''; // Handle image URLs directly - return safe (possibly proxied) URL if (contentType.startsWith('image/')) { const safeImageUrl = await getSafeImageUrl(targetUrl); const result = { url: targetUrl, type: 'image', image: safeImageUrl, trusted, }; const resp = new Response(JSON.stringify(result), { status: 200, headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600', }, }); if (!hadCookie) { resp.headers.set('Set-Cookie', createNonceCookie(cookieId)); } return resp; } // Handle video URLs directly (no proxy for video - too large) if (contentType.startsWith('video/')) { // Only allow trusted video sources if (!trusted) { return new Response(JSON.stringify({ error: 'Untrusted video source', }), { status: 403, headers: { 'Content-Type': 'application/json' }, }); } const result = { url: targetUrl, type: 'video', video: targetUrl, trusted, }; const resp = new Response(JSON.stringify(result), { status: 200, headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600', }, }); if (!hadCookie) { resp.headers.set('Set-Cookie', createNonceCookie(cookieId)); } return resp; } // Only parse HTML if (!contentType.includes('text/html') && !contentType.includes('application/xhtml')) { return new Response(JSON.stringify({ error: 'URL is not an HTML page', contentType }), { status: 400, headers: { 'Content-Type': 'application/json' }, }); } // Read only the first 50KB to get meta tags (they're usually in ) const reader = response.body.getReader(); let html = ''; let bytesRead = 0; const maxBytes = 50 * 1024; while (bytesRead < maxBytes) { const { done, value } = await reader.read(); if (done) break; html += new TextDecoder().decode(value); bytesRead += value.length; // Stop early if we've passed if (html.includes('')) break; } reader.cancel(); const meta = parseMetaTags(html, targetUrl); meta.trusted = trusted; // Convert image URL to safe URL (proxy if untrusted) if (meta.image) { meta.image = await getSafeImageUrl(meta.image); } const resp = new Response(JSON.stringify(meta), { status: 200, headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600', }, }); if (!hadCookie) { resp.headers.set('Set-Cookie', createNonceCookie(cookieId)); } return resp; } catch (err) { console.error('[link-preview] Error fetching URL:', err.message); return new Response(JSON.stringify({ error: 'Failed to fetch preview', message: err.message }), { status: 500, headers: { 'Content-Type': 'application/json' }, }); } }