132 lines
3.4 KiB
JavaScript
132 lines
3.4 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
import https from 'https';
|
|
import http from 'http';
|
|
import fs from 'fs';
|
|
import path from 'path';
|
|
import { URL } from 'url';
|
|
|
|
const BASE_URL = 'https://corvanis.wiki';
|
|
const PAGES_LIST_FILE = './tools/wikiPagesList.txt';
|
|
const OUTPUT_DIR = './wiki-downloads';
|
|
|
|
// Create output directory
|
|
if (!fs.existsSync(OUTPUT_DIR)) {
|
|
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
|
}
|
|
|
|
/**
|
|
* Make an HTTP(S) request
|
|
* @param {string} url The URL to request
|
|
* @returns {Promise<{data: string, statusCode: number}>} The response data and status code
|
|
*/
|
|
function makeRequest(url) {
|
|
return new Promise((resolve, reject) => {
|
|
const urlObj = new URL(url);
|
|
const protocol = urlObj.protocol === 'https:' ? https : http;
|
|
|
|
protocol
|
|
.get(url, (res) => {
|
|
let data = '';
|
|
|
|
res.on('data', (chunk) => {
|
|
data += chunk;
|
|
});
|
|
|
|
res.on('end', () => {
|
|
resolve({ data, statusCode: res.statusCode });
|
|
});
|
|
})
|
|
.on('error', (err) => {
|
|
reject(err);
|
|
});
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Sanitize filename
|
|
* @param {string} str The string to sanitize
|
|
* @returns {string} Sanitized filename
|
|
*/
|
|
function sanitizeFilename(str) {
|
|
return str
|
|
.replace(/^\//, '') // Remove leading slash
|
|
.replace(/[^a-z0-9_\-+()]/gi, '_')
|
|
.replace(/_+/g, '_')
|
|
.replace(/^_|_$/g, '');
|
|
}
|
|
|
|
/**
|
|
* Download a page
|
|
* @param {string} pagePath The path of the page to download
|
|
* @returns {Promise<boolean>} Success status
|
|
*/
|
|
async function downloadPage(pagePath) {
|
|
console.log(`Téléchargement: ${pagePath}`);
|
|
|
|
try {
|
|
const url = BASE_URL + pagePath;
|
|
const { data, statusCode } = await makeRequest(url);
|
|
|
|
if (statusCode !== 200) {
|
|
console.error(` ❌ Erreur ${statusCode} pour ${pagePath}`);
|
|
return false;
|
|
}
|
|
|
|
// Save the HTML file
|
|
const filename = `${sanitizeFilename(pagePath)}.html`;
|
|
const filepath = path.join(OUTPUT_DIR, filename);
|
|
fs.writeFileSync(filepath, data, 'utf-8');
|
|
console.log(` ✓ Sauvegardé: ${filename}`);
|
|
|
|
// Wait a bit to avoid overwhelming the server
|
|
await new Promise((resolve) => {
|
|
setTimeout(resolve, 500);
|
|
});
|
|
|
|
return true;
|
|
} catch (error) {
|
|
console.error(` ❌ Erreur lors du téléchargement de ${pagePath}:`, error.message);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Main function
|
|
*/
|
|
async function main() {
|
|
console.log('🚀 Début du téléchargement des pages wiki...');
|
|
console.log(` Base URL: ${BASE_URL}`);
|
|
console.log(` Liste des pages: ${PAGES_LIST_FILE}`);
|
|
console.log(` Répertoire de sortie: ${OUTPUT_DIR}\n`);
|
|
|
|
// Read the list of pages
|
|
let pages = [];
|
|
if (fs.existsSync(PAGES_LIST_FILE)) {
|
|
const content = fs.readFileSync(PAGES_LIST_FILE, 'utf-8');
|
|
pages = content
|
|
.split('\n')
|
|
.map((line) => line.trim())
|
|
.filter((line) => line && !line.startsWith('#'));
|
|
} else {
|
|
console.error(`❌ Fichier de liste introuvable: ${PAGES_LIST_FILE}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`📄 ${pages.length} page(s) à télécharger\n`);
|
|
|
|
let successCount = 0;
|
|
for (const page of pages) {
|
|
const success = await downloadPage(page);
|
|
if (success) {
|
|
successCount += 1;
|
|
}
|
|
}
|
|
|
|
console.log(`\n✅ Téléchargement terminé!`);
|
|
console.log(` Pages réussies: ${successCount}/${pages.length}`);
|
|
console.log(` Fichiers sauvegardés dans: ${OUTPUT_DIR}`);
|
|
}
|
|
|
|
main().catch(console.error);
|