forked from public/fvtt-cthulhu-eternal
		
	
		
			
				
	
	
		
			271 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			271 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| var should  = require('should'),
 | |
|     needle  = require('./../'),
 | |
|     decoder = require('./../lib/decoder'),
 | |
|     Q       = require('q'),
 | |
|     chardet = require('jschardet'),
 | |
|     fs      = require('fs'),
 | |
|     http    = require('http'),
 | |
|     helpers = require('./helpers');
 | |
| 
 | |
| describe('character encoding', function() {
 | |
| 
 | |
|   this.timeout(5000);
 | |
| 
 | |
|   function staticServerFor(file, content_type) {
 | |
|     return http.createServer(function(req, res) {
 | |
|       req.on('data', function(chunk) {})
 | |
|       req.on('end', function() {
 | |
|         // We used to pull from a particular site that is no longer up.
 | |
|         // This is a local mirror pulled from archive.org
 | |
|         // https://web.archive.org/web/20181003202907/http://www.nina.jp/server/slackware/webapp/tomcat_charset.html
 | |
|         fs.readFile(file, function(err, data) {
 | |
|           if (err) {
 | |
|             res.writeHead(404);
 | |
|             res.end(JSON.stringify(err));
 | |
|             return;
 | |
|           }
 | |
|           res.writeHeader(200, { 'Content-Type': content_type })
 | |
|           res.end(data);
 | |
|         });
 | |
|       })
 | |
|     })
 | |
|   }
 | |
| 
 | |
|   describe('Given content-type: "text/html; charset=EUC-JP"', function() {
 | |
|     var server, port = 2233;
 | |
| 
 | |
|     before(function(done) {
 | |
|       server = staticServerFor('test/files/tomcat_charset.html', 'text/html; charset=EUC-JP')
 | |
|       server.listen(port, done)
 | |
|       url = 'http://localhost:' + port;
 | |
|     })
 | |
| 
 | |
|     after(function(done) {
 | |
|       server.close(done)
 | |
|     })
 | |
| 
 | |
|     describe('with decode = false', function() {
 | |
|       it('does not decode', function(done) {
 | |
|         needle.get(url, { decode: false }, function(err, resp) {
 | |
|           resp.body.should.be.a.String;
 | |
|           chardet.detect(resp.body).encoding.should.eql('windows-1252');
 | |
|           resp.body.indexOf('EUCを使う').should.eql(-1);
 | |
|           done();
 | |
|         })
 | |
|       })
 | |
|     })
 | |
| 
 | |
|     describe('with decode = true', function() {
 | |
|       it('decodes', function(done) {
 | |
|         needle.get(url, { decode: true }, function(err, resp) {
 | |
|           resp.body.should.be.a.String;
 | |
|           chardet.detect(resp.body).encoding.should.eql('ascii');
 | |
|           resp.body.indexOf('EUCを使う').should.not.eql(-1);
 | |
|           done();
 | |
|         })
 | |
|       })
 | |
|     })
 | |
|   })
 | |
| 
 | |
|   describe('Given content-type: "text/html but file is charset: gb2312', function() {
 | |
| 
 | |
|     it('encodes to UTF-8', function(done) {
 | |
| 
 | |
|       // Our Needle wrapper that requests a chinese website.
 | |
|       var task    = Q.nbind(needle.get, needle, 'http://www.chinesetop100.com/');
 | |
| 
 | |
|       // Different instantiations of this task
 | |
|       var tasks   = [Q.fcall(task, {decode: true}),
 | |
|                      Q.fcall(task, {decode: false})];
 | |
| 
 | |
|       var results = tasks.map(function(task) {
 | |
|         return task.then(function(obj) {
 | |
|           return obj[0].body;
 | |
|         });
 | |
|       });
 | |
| 
 | |
|       // Execute all requests concurrently
 | |
|       Q.all(results).done(function(bodies) {
 | |
| 
 | |
|         var charsets = [
 | |
|           chardet.detect(bodies[0]).encoding,
 | |
|           chardet.detect(bodies[1]).encoding,
 | |
|         ]
 | |
| 
 | |
|         // We wanted to decode our first stream as specified by options
 | |
|         charsets[0].should.equal('ascii');
 | |
|         bodies[0].indexOf('全球中文网站前二十强').should.not.equal(-1);
 | |
| 
 | |
|         // But not our second stream
 | |
|         charsets[1].should.equal('windows-1252');
 | |
|         bodies[1].indexOf('全球中文网站前二十强').should.equal(-1);
 | |
| 
 | |
|         done();
 | |
|       });
 | |
|     })
 | |
|   })
 | |
| 
 | |
|   describe('Given content-type: text/html; charset=maccentraleurope', function() {
 | |
|     var server, port = 2233;
 | |
| 
 | |
|     // from 'https://wayback.archive-it.org/3259/20160921140616/https://www.arc.gov/research/MapsofAppalachia.asp?MAP_ID=11';
 | |
|     before(function(done) {
 | |
|       server = staticServerFor('test/files/Appalachia.html', 'text/html; charset=maccentraleurope')
 | |
|       server.listen(port, done)
 | |
|       url = 'http://localhost:' + port;
 | |
|     })
 | |
| 
 | |
|     after(function(done) {
 | |
|       server.close(done)
 | |
|     })
 | |
| 
 | |
|     describe('with decode = false', function() {
 | |
|       it('does not decode', function(done) {
 | |
|         needle.get(url, { decode: false }, function(err, resp) {
 | |
|           resp.body.should.be.a.String;
 | |
|           chardet.detect(resp.body).encoding.should.eql('ascii');
 | |
|           done();
 | |
|         })
 | |
|       })
 | |
|     })
 | |
| 
 | |
|     describe('with decode = true', function() {
 | |
|       it('does not explode', function(done) {
 | |
|         (function() {
 | |
|           needle.get(url, { decode: true }, function(err, resp) {
 | |
|             resp.body.should.be.a.String;
 | |
|             chardet.detect(resp.body).encoding.should.eql('ascii');
 | |
|             done();
 | |
|           })
 | |
|         }).should.not.throw();
 | |
|       })
 | |
|     })
 | |
|   })
 | |
| 
 | |
|   describe('Given content-type: "text/html"', function () {
 | |
| 
 | |
|     var server,
 | |
|         port = 54321,
 | |
|         text = 'Magyarországi Fióktelepe'
 | |
| 
 | |
|     before(function(done) {
 | |
|       server = helpers.server({
 | |
|         port: port,
 | |
|         response: text,
 | |
|         headers: { 'Content-Type': 'text/html' }
 | |
|       }, done);
 | |
|     })
 | |
| 
 | |
|     after(function(done) {
 | |
|       server.close(done)
 | |
|     })
 | |
| 
 | |
|     describe('with decode = false', function () {
 | |
|       it('decodes by default to utf-8', function (done) {
 | |
| 
 | |
|         needle.get('http://localhost:' + port, { decode: false }, function (err, resp) {
 | |
|           resp.body.should.be.a.String;
 | |
|           chardet.detect(resp.body).encoding.should.eql('ISO-8859-2');
 | |
|           resp.body.should.eql('Magyarországi Fióktelepe')
 | |
|           done();
 | |
|         })
 | |
| 
 | |
|       })
 | |
| 
 | |
|     })
 | |
|   })
 | |
|   
 | |
|   describe('multibyte characters split across chunks', function () {
 | |
| 
 | |
|     describe('with encoding = utf-8', function() {
 | |
|     
 | |
|       var d, 
 | |
|         result = Buffer.allocUnsafe(0);
 | |
| 
 | |
|       before(function(done) {
 | |
|         d = decoder('utf-8');
 | |
|         done();
 | |
|       });
 | |
| 
 | |
|       it('reassembles split multibyte characters', function (done) {
 | |
| 
 | |
|         d.on("data", function(chunk){
 | |
|           result = Buffer.concat([ result, chunk ]);
 | |
|         });
 | |
| 
 | |
|         d.on("end", function(){
 | |
|           result.toString("utf-8").should.eql('慶');
 | |
|           done();
 | |
|         });
 | |
| 
 | |
|         // write '慶' in utf-8 split across chunks
 | |
|         d.write(Buffer.from([0xE6]));
 | |
|         d.write(Buffer.from([0x85]));
 | |
|         d.write(Buffer.from([0xB6]));
 | |
|         d.end();
 | |
| 
 | |
|       })
 | |
|     })
 | |
|     
 | |
|     describe('with encoding = euc-jp', function() {
 | |
|     
 | |
|       var d, 
 | |
|         result = Buffer.allocUnsafe(0);
 | |
| 
 | |
|       before(function(done) {
 | |
|         d = decoder('euc-jp');
 | |
|         done();
 | |
|       });
 | |
| 
 | |
|       it('reassembles split multibyte characters', function (done) {
 | |
| 
 | |
|         d.on("data", function(chunk){
 | |
|           result = Buffer.concat([ result, chunk ]);
 | |
|         });
 | |
| 
 | |
|         d.on("end", function(){
 | |
|           result.toString("utf-8").should.eql('慶');
 | |
|           done();
 | |
|         });
 | |
| 
 | |
|         // write '慶' in euc-jp split across chunks
 | |
|         d.write(Buffer.from([0xB7]));
 | |
|         d.write(Buffer.from([0xC4]));
 | |
|         d.end();
 | |
| 
 | |
|       })
 | |
|     })
 | |
|     
 | |
|     describe('with encoding = gb18030', function() {
 | |
|     
 | |
|       var d, 
 | |
|         result = Buffer.allocUnsafe(0);
 | |
| 
 | |
|       before(function(done) {
 | |
|         d = decoder('gb18030');
 | |
|         done();
 | |
|       });
 | |
| 
 | |
|       it('reassembles split multibyte characters', function (done) {
 | |
| 
 | |
|         d.on("data", function(chunk){
 | |
|           result = Buffer.concat([ result, chunk ]);
 | |
|         });
 | |
| 
 | |
|         d.on("end", function(){
 | |
|           result.toString("utf-8").should.eql('慶');
 | |
|           done();
 | |
|         });
 | |
| 
 | |
|         // write '慶' in gb18030 split across chunks
 | |
|         d.write(Buffer.from([0x91]));
 | |
|         d.write(Buffer.from([0x63]));
 | |
|         d.end();
 | |
| 
 | |
|       })
 | |
|     })
 | |
| 
 | |
|   })
 | |
|   
 | |
| })
 |