First adaptation pass

2025-11-06 00:01:59 +01:00
parent 5b1fd847c2
commit 6b883f8126
112 changed files with 44142 additions and 953 deletions
@@ -0,0 +1,187 @@
+#!/usr/bin/env node
+
+import https from 'https';
+import http from 'http';
+import fs from 'fs';
+import path from 'path';
+import { URL } from 'url';
+
+const BASE_URL = 'https://corvanis.wiki';
+const START_PAGE = '/Prism+(Testing)/Prism';
+const OUTPUT_DIR = './wiki-downloads';
+
+// Known Prism pages structure - will be expanded as we find more
+const KNOWN_SECTIONS = [
+  '',
+  '/Rules',
+  '/Character+Creation',
+  '/Combat',
+  '/Equipment',
+  '/Magic',
+  '/Spells',
+  '/Miracles',
+  '/Monsters',
+  '/Bestiary',
+];
+
+// Set to track visited pages
+const visitedPages = new Set();
+const pagesToVisit = [START_PAGE];
+
+// Create output directory
+if (!fs.existsSync(OUTPUT_DIR)) {
+  fs.mkdirSync(OUTPUT_DIR, { recursive: true });
+}
+
+/**
+ * Make an HTTP(S) request
+ * @param {string} url The URL to request
+ * @returns {Promise<{data: string, statusCode: number}>} The response data and status code
+ */
+function makeRequest(url) {
+  return new Promise((resolve, reject) => {
+    const urlObj = new URL(url);
+    const protocol = urlObj.protocol === 'https:' ? https : http;
+
+    protocol.get(url, (res) => {
+      let data = '';
+
+      res.on('data', (chunk) => {
+        data += chunk;
+      });
+
+      res.on('end', () => {
+        resolve({ data, statusCode: res.statusCode });
+      });
+    }).on('error', (err) => {
+      reject(err);
+    });
+  });
+}
+
+/**
+ * Extract links from HTML content
+ * @param {string} html The HTML content to parse
+ * @param {string} currentPath The current page path
+ * @returns {string[]} Array of extracted links
+ */
+function extractLinks(html, currentPath) {
+  const links = new Set();
+
+  // Match links that contain Prism+(Testing)/Prism in the path
+  const linkRegex = /href=["']([^"']*Prism\+\(Testing\)\/Prism[^"']*)["']/gi;
+  let match;
+
+  while ((match = linkRegex.exec(html)) !== null) {
+    let link = match[1];
+
+    // Clean up the link
+    link = link.split('#')[0]; // Remove anchors
+    link = link.split('?')[0]; // Remove query params
+
+    // Make sure it starts with /
+    if (!link.startsWith('/')) {
+      if (link.startsWith('http')) {
+        try {
+          const urlObj = new URL(link);
+          link = urlObj.pathname;
+        } catch (e) {
+          continue;
+        }
+      } else {
+        continue;
+      }
+    }
+
+    // Only keep links under Prism+(Testing)/Prism
+    if (link.includes('Prism+(Testing)/Prism')) {
+      links.add(link);
+    }
+  }
+
+  return Array.from(links);
+}
+
+/**
+ * Sanitize filename
+ * @param {string} str The string to sanitize
+ * @returns {string} Sanitized filename
+ */
+function sanitizeFilename(str) {
+  return str
+    .replace(/[^a-z0-9_\-+()]/gi, '_')
+    .replace(/_+/g, '_')
+    .replace(/^_|_$/g, '');
+}
+
+/**
+ * Download a page
+ * @param {string} pagePath The path of the page to download
+ * @returns {Promise<void>}
+ */
+async function downloadPage(pagePath) {
+  if (visitedPages.has(pagePath)) {
+    return;
+  }
+
+  visitedPages.add(pagePath);
+  console.log(`Téléchargement: ${pagePath}`);
+
+  try {
+    const url = BASE_URL + pagePath;
+    const { data, statusCode } = await makeRequest(url);
+
+    if (statusCode !== 200) {
+      console.error(`  ❌ Erreur ${statusCode} pour ${pagePath}`);
+      return;
+    }
+
+    // Save the HTML file
+    const filename = `${sanitizeFilename(pagePath)}.html`;
+    const filepath = path.join(OUTPUT_DIR, filename);
+    fs.writeFileSync(filepath, data, 'utf-8');
+    console.log(`  ✓ Sauvegardé: ${filename}`);
+
+    // Extract and queue new links
+    const links = extractLinks(data, pagePath);
+    for (const link of links) {
+      if (!visitedPages.has(link)) {
+        pagesToVisit.push(link);
+      }
+    }
+
+    // Wait a bit to avoid overwhelming the server
+    await new Promise((resolve) => {
+      setTimeout(resolve, 500);
+    });
+
+  } catch (error) {
+    console.error(`  ❌ Erreur lors du téléchargement de ${pagePath}:`, error.message);
+  }
+}
+
+/**
+ * Main function
+ */
+async function main() {
+  console.log('🚀 Début du téléchargement des pages wiki...');
+  console.log(`   Base URL: ${BASE_URL}`);
+  console.log(`   Page de départ: ${START_PAGE}`);
+  console.log(`   Répertoire de sortie: ${OUTPUT_DIR}\n`);
+
+  while (pagesToVisit.length > 0) {
+    const page = pagesToVisit.shift();
+    await downloadPage(page);
+  }
+
+  console.log(`\n✅ Téléchargement terminé!`);
+  console.log(`   Pages téléchargées: ${visitedPages.size}`);
+  console.log(`   Fichiers sauvegardés dans: ${OUTPUT_DIR}`);
+
+  // Save a list of downloaded pages
+  const listPath = path.join(OUTPUT_DIR, '_pages-list.txt');
+  fs.writeFileSync(listPath, Array.from(visitedPages).sort().join('\n'));
+  console.log(`   Liste des pages: ${listPath}`);
+}
+
+main().catch(console.error);