#!/usr/bin/env node import puppeteer from 'puppeteer'; import TurndownService from 'turndown'; import fs from 'fs'; import path from 'path'; const BASE_URL = 'https://corvanis.wiki'; const PAGES_LIST_FILE = './tools/wikiPagesList.txt'; const OUTPUT_FILE = './prism-rules/prism-wiki-complete.md'; // Create output directory const outputDir = path.dirname(OUTPUT_FILE); if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } // Initialize Turndown for HTML to Markdown conversion const turndownService = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced', }); /** * Wait for a specific time * @param {number} ms Milliseconds to wait * @returns {Promise} */ function wait(ms) { return new Promise((resolve) => { setTimeout(resolve, ms); }); } /** * Sanitize page title for heading * @param {string} path Page path * @returns {string} Clean title */ function getPageTitle(path) { return path .replace(/^\/Prism\+\(Testing\)\/Prism\/?/, '') .replace(/\+/g, ' ') .trim() || 'Prism Home'; } /** * Scrape a page and convert to Markdown * @param {object} page Puppeteer page object * @param {string} pagePath Page path to scrape * @returns {Promise} Markdown content */ async function scrapePage(page, pagePath) { const url = BASE_URL + pagePath; console.log(`📄 Scraping: ${pagePath}`); try { // Navigate to the page await page.goto(url, { waitUntil: 'networkidle0', timeout: 30000, }); // Wait for the main content to load await page.waitForSelector('.published-container', { timeout: 10000 }); // Wait a bit more for dynamic content await wait(2000); // Extract the main content const content = await page.evaluate(() => { const container = document.querySelector('.published-container'); if (!container) { return null; } // Remove navigation elements const nav = container.querySelector('.site-body-left-column'); if (nav) { nav.remove(); } // Get the main content area const mainContent = container.querySelector('.site-body-center-column'); return mainContent ? mainContent.innerHTML : container.innerHTML; }); if (!content) { console.log(` ⚠ Pas de contenu trouvĂ© pour ${pagePath}`); return ''; } // Convert HTML to Markdown const markdown = turndownService.turndown(content); console.log(` ✓ Converti (${markdown.length} caractĂšres)`); return markdown; } catch (error) { console.error(` ❌ Erreur: ${error.message}`); return ''; } } /** * Main function */ async function main() { console.log('🚀 DĂ©but du scraping du wiki Prism...\n'); // Read the list of pages let pages = []; if (fs.existsSync(PAGES_LIST_FILE)) { const content = fs.readFileSync(PAGES_LIST_FILE, 'utf-8'); pages = content .split('\n') .map((line) => line.trim()) .filter((line) => line && !line.startsWith('#')); } else { console.error(`❌ Fichier de liste introuvable: ${PAGES_LIST_FILE}`); process.exit(1); } console.log(`📋 ${pages.length} page(s) Ă  scraper\n`); // Launch browser console.log('🌐 Lancement du navigateur...\n'); const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox', '--disable-setuid-sandbox'], }); const page = await browser.newPage(); await page.setViewport({ width: 1280, height: 800 }); // Scrape all pages let allMarkdown = '# Prism RPG - Wiki Complete\n\n'; allMarkdown += `*Scraped from ${BASE_URL} on ${new Date().toLocaleDateString('fr-FR')}*\n\n`; allMarkdown += '---\n\n'; for (const pagePath of pages) { const title = getPageTitle(pagePath); const level = pagePath === '/Prism+(Testing)/Prism' ? '##' : '###'; allMarkdown += `${level} ${title}\n\n`; const markdown = await scrapePage(page, pagePath); if (markdown) { allMarkdown += `${markdown}\n\n`; allMarkdown += '---\n\n'; } // Wait between requests await wait(1000); } // Close browser await browser.close(); // Save the combined Markdown file fs.writeFileSync(OUTPUT_FILE, allMarkdown, 'utf-8'); console.log(`\n✅ Scraping terminĂ©!`); console.log(` Pages scrapĂ©es: ${pages.length}`); console.log(` Fichier gĂ©nĂ©rĂ©: ${OUTPUT_FILE}`); console.log(` Taille: ${(allMarkdown.length / 1024).toFixed(2)} KB`); } main().catch((error) => { console.error('❌ Erreur fatale:', error); process.exit(1); });