| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248 | var should  = require('should'),    needle  = require('./../'),    decoder = require('./../lib/decoder'),    Q       = require('q'),    chardet = require('jschardet'),    fs      = require('fs'),    http    = require('http'),    helpers = require('./helpers');describe('character encoding', function() {  this.timeout(5000);  describe('Given content-type: "text/html; charset=EUC-JP"', function() {    var port = 2233;    var server;    function createServer() {      return http.createServer(function(req, res) {        req.on('data', function(chunk) {})        req.on('end', function() {          // We used to pull from a particular site that is no longer up.          // This is a local mirror pulled from archive.org          // https://web.archive.org/web/20181003202907/http://www.nina.jp/server/slackware/webapp/tomcat_charset.html          fs.readFile('test/tomcat_charset.html', function(err, data) {            if (err) {              res.writeHead(404);              res.end(JSON.stringify(err));              return;            }            res.writeHeader(200, { 'Content-Type': 'text/html; charset=EUC-JP' })            res.end(data);          });        })      })    }    before(function(done) {      server = createServer();      server.listen(port, done)      url = 'http://localhost:' + port;    })    after(function(done) {      server.close(done)    })    describe('with decode = false', function() {      it('does not decode', function(done) {        needle.get(url, { decode: false }, function(err, resp) {          resp.body.should.be.a.String;          chardet.detect(resp.body).encoding.should.eql('windows-1252');          resp.body.indexOf('EUCを使う').should.eql(-1);          done();        })      })    })    describe('with decode = true', function() {      it('decodes', function(done) {        needle.get(url, { decode: true }, function(err, resp) {          resp.body.should.be.a.String;          chardet.detect(resp.body).encoding.should.eql('ascii');          resp.body.indexOf('EUCを使う').should.not.eql(-1);          done();        })      })    })  })  describe('Given content-type: "text/html but file is charset: gb2312', function() {    it('encodes to UTF-8', function(done) {      // Our Needle wrapper that requests a chinese website.      var task    = Q.nbind(needle.get, needle, 'http://www.chinesetop100.com/');      // Different instantiations of this task      var tasks   = [Q.fcall(task, {decode: true}),                     Q.fcall(task, {decode: false})];      var results = tasks.map(function(task) {        return task.then(function(obj) {          return obj[0].body;        });      });      // Execute all requests concurrently      Q.all(results).done(function(bodies) {        var charsets = [          chardet.detect(bodies[0]).encoding,          chardet.detect(bodies[1]).encoding,        ]        // We wanted to decode our first stream as specified by options        charsets[0].should.equal('ascii');        bodies[0].indexOf('全球中文网站前二十强').should.not.equal(-1);        // But not our second stream        charsets[1].should.equal('windows-1252');        bodies[1].indexOf('全球中文网站前二十强').should.equal(-1);        done();      });    })  })  describe('Given content-type: "text/html"', function () {    var server,        port = 54321,        text = 'Magyarországi Fióktelepe'    before(function(done) {      server = helpers.server({        port: port,        response: text,        headers: { 'Content-Type': 'text/html' }      }, done);    })    after(function(done) {      server.close(done)    })    describe('with decode = false', function () {      it('decodes by default to utf-8', function (done) {        needle.get('http://localhost:' + port, { decode: false }, function (err, resp) {          resp.body.should.be.a.String;          chardet.detect(resp.body).encoding.should.eql('ISO-8859-2');          resp.body.should.eql('Magyarországi Fióktelepe')          done();        })      })    })  })    describe('multibyte characters split across chunks', function () {    describe('with encoding = utf-8', function() {          var d,         result = Buffer.allocUnsafe(0);      before(function(done) {        d = decoder('utf-8');        done();      });      it('reassembles split multibyte characters', function (done) {        d.on("data", function(chunk){          result = Buffer.concat([ result, chunk ]);        });        d.on("end", function(){          result.toString("utf-8").should.eql('慶');          done();        });        // write '慶' in utf-8 split across chunks        d.write(Buffer.from([0xE6]));        d.write(Buffer.from([0x85]));        d.write(Buffer.from([0xB6]));        d.end();      })    })        describe('with encoding = euc-jp', function() {          var d,         result = Buffer.allocUnsafe(0);      before(function(done) {        d = decoder('euc-jp');        done();      });      it('reassembles split multibyte characters', function (done) {        d.on("data", function(chunk){          result = Buffer.concat([ result, chunk ]);        });        d.on("end", function(){          result.toString("utf-8").should.eql('慶');          done();        });        // write '慶' in euc-jp split across chunks        d.write(Buffer.from([0xB7]));        d.write(Buffer.from([0xC4]));        d.end();      })    })        describe('with encoding = gb18030', function() {          var d,         result = Buffer.allocUnsafe(0);      before(function(done) {        d = decoder('gb18030');        done();      });      it('reassembles split multibyte characters', function (done) {        d.on("data", function(chunk){          result = Buffer.concat([ result, chunk ]);        });        d.on("end", function(){          result.toString("utf-8").should.eql('慶');          done();        });        // write '慶' in gb18030 split across chunks        d.write(Buffer.from([0x91]));        d.write(Buffer.from([0x63]));        d.end();      })    })  })  })
 |