generate-singlebyte.js 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. var fs = require("fs");
  2. var Iconv = require("iconv").Iconv;
  3. var encodingFamilies = [
  4. {
  5. // Windows code pages
  6. encodings: [1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258],
  7. convert: function(cp) {
  8. return {
  9. name: "windows-"+cp,
  10. aliases: ["win"+cp, "cp"+cp, ""+cp],
  11. }
  12. }
  13. },
  14. {
  15. // ISO-8859 code pages
  16. encodings: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16],
  17. convert: function(i) {
  18. return {
  19. name: "iso-8859-"+i,
  20. aliases: ["cp"+(28590+i), (28590+i)],
  21. }
  22. }
  23. },
  24. {
  25. // IBM/DOS code pages
  26. encodings: [437, 737, 775, 850, 852, 855, 857, 858, 860, 861, 862, 863, 864, 865, 866, 869],
  27. convert: function(cp) {
  28. return {
  29. name: "CP"+cp,
  30. aliases: ["ibm"+cp, ""+cp],
  31. }
  32. }
  33. },
  34. {
  35. // Macintosh code pages
  36. encodings: ["macCroatian", "macCyrillic", "macGreek",
  37. "macIceland", "macRoman", "macRomania",
  38. "macThai", "macTurkish", "macUkraine"],
  39. },
  40. {
  41. // KOI8 code pages
  42. encodings: ["KOI8-R", "KOI8-U"],
  43. },
  44. ];
  45. var encodings = {
  46. // Aliases.
  47. "ascii8bit": "ascii",
  48. "usascii": "ascii",
  49. "latin1": "iso88591",
  50. "latin2": "iso88592",
  51. "latin3": "iso88593",
  52. "latin4": "iso88594",
  53. "latin6": "iso885910",
  54. "latin7": "iso885913",
  55. "latin8": "iso885914",
  56. "latin9": "iso885915",
  57. "latin10": "iso885916",
  58. "cp819": "iso88951",
  59. "arabic": "iso88596",
  60. "arabic8": "iso88596",
  61. "greek" : "iso88597",
  62. "greek8" : "iso88597",
  63. "hebrew": "iso88598",
  64. "hebrew8": "iso88598",
  65. "turkish": "iso88599",
  66. "turkish8": "iso88599",
  67. "thai": "iso885911",
  68. "thai8": "iso885911",
  69. "tis620": "iso885911",
  70. "windows874": "iso885911",
  71. "win874": "iso885911",
  72. "cp874": "iso885911",
  73. "874": "iso885911",
  74. "celtic": "iso885914",
  75. "celtic8": "iso885914",
  76. "cp20866": "koi8r",
  77. "20866": "koi8r",
  78. "ibm878": "koi8r",
  79. "cp21866": "koi8u",
  80. "21866": "koi8u",
  81. "ibm1168": "koi8u",
  82. };
  83. // Add all encodings from encodingFamilies.
  84. encodingFamilies.forEach(function(family){
  85. family.encodings.forEach(function(encoding){
  86. if (family.convert)
  87. encoding = family.convert(encoding);
  88. var encodingIconvName = encoding.name ? encoding.name : encoding;
  89. var encodingName = encodingIconvName.replace(/[-_]/g, "").toLowerCase();
  90. encodings[encodingName] = {
  91. type: "singlebyte",
  92. chars: generateCharsString(encodingIconvName)
  93. };
  94. if (encoding.aliases)
  95. encoding.aliases.forEach(function(alias){
  96. encodings[alias] = encodingName;
  97. });
  98. });
  99. });
  100. // Write encodings.
  101. fs.writeFileSync("encodings/singlebyte.js",
  102. "module.exports = " + JSON.stringify(encodings, undefined, " ") + ";");
  103. function generateCharsString(encoding) {
  104. console.log("Generate encoding for " + encoding);
  105. var iconvToUtf8 = new Iconv(encoding, "UTF-8");
  106. var chars = "";
  107. for (var b = 0x80; b < 0x100; b++) {
  108. try {
  109. var convertedChar = iconvToUtf8.convert(new Buffer([b])).toString();
  110. if (convertedChar.length != 1)
  111. throw new Error("Single-byte encoding error: Must return single char.");
  112. } catch (exception) {
  113. if (exception.code === "EILSEQ") {
  114. convertedChar = "\ufffd";
  115. } else {
  116. throw exception;
  117. }
  118. }
  119. chars += convertedChar;
  120. }
  121. return chars;
  122. }