string-punycode-to-ascii.js 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. // based on https://github.com/bestiejs/punycode.js/blob/master/punycode.js
  2. var uncurryThis = require('../internals/function-uncurry-this');
  3. var maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
  4. var base = 36;
  5. var tMin = 1;
  6. var tMax = 26;
  7. var skew = 38;
  8. var damp = 700;
  9. var initialBias = 72;
  10. var initialN = 128; // 0x80
  11. var delimiter = '-'; // '\x2D'
  12. var regexNonASCII = /[^\0-\u007E]/; // non-ASCII chars
  13. var regexSeparators = /[.\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
  14. var OVERFLOW_ERROR = 'Overflow: input needs wider integers to process';
  15. var baseMinusTMin = base - tMin;
  16. var $RangeError = RangeError;
  17. var exec = uncurryThis(regexSeparators.exec);
  18. var floor = Math.floor;
  19. var fromCharCode = String.fromCharCode;
  20. var charCodeAt = uncurryThis(''.charCodeAt);
  21. var join = uncurryThis([].join);
  22. var push = uncurryThis([].push);
  23. var replace = uncurryThis(''.replace);
  24. var split = uncurryThis(''.split);
  25. var toLowerCase = uncurryThis(''.toLowerCase);
  26. /**
  27. * Creates an array containing the numeric code points of each Unicode
  28. * character in the string. While JavaScript uses UCS-2 internally,
  29. * this function will convert a pair of surrogate halves (each of which
  30. * UCS-2 exposes as separate characters) into a single code point,
  31. * matching UTF-16.
  32. */
  33. var ucs2decode = function (string) {
  34. var output = [];
  35. var counter = 0;
  36. var length = string.length;
  37. while (counter < length) {
  38. var value = charCodeAt(string, counter++);
  39. if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
  40. // It's a high surrogate, and there is a next character.
  41. var extra = charCodeAt(string, counter++);
  42. if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
  43. push(output, ((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
  44. } else {
  45. // It's an unmatched surrogate; only append this code unit, in case the
  46. // next code unit is the high surrogate of a surrogate pair.
  47. push(output, value);
  48. counter--;
  49. }
  50. } else {
  51. push(output, value);
  52. }
  53. }
  54. return output;
  55. };
  56. /**
  57. * Converts a digit/integer into a basic code point.
  58. */
  59. var digitToBasic = function (digit) {
  60. // 0..25 map to ASCII a..z or A..Z
  61. // 26..35 map to ASCII 0..9
  62. return digit + 22 + 75 * (digit < 26);
  63. };
  64. /**
  65. * Bias adaptation function as per section 3.4 of RFC 3492.
  66. * https://tools.ietf.org/html/rfc3492#section-3.4
  67. */
  68. var adapt = function (delta, numPoints, firstTime) {
  69. var k = 0;
  70. delta = firstTime ? floor(delta / damp) : delta >> 1;
  71. delta += floor(delta / numPoints);
  72. while (delta > baseMinusTMin * tMax >> 1) {
  73. delta = floor(delta / baseMinusTMin);
  74. k += base;
  75. }
  76. return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
  77. };
  78. /**
  79. * Converts a string of Unicode symbols (e.g. a domain name label) to a
  80. * Punycode string of ASCII-only symbols.
  81. */
  82. var encode = function (input) {
  83. var output = [];
  84. // Convert the input in UCS-2 to an array of Unicode code points.
  85. input = ucs2decode(input);
  86. // Cache the length.
  87. var inputLength = input.length;
  88. // Initialize the state.
  89. var n = initialN;
  90. var delta = 0;
  91. var bias = initialBias;
  92. var i, currentValue;
  93. // Handle the basic code points.
  94. for (i = 0; i < input.length; i++) {
  95. currentValue = input[i];
  96. if (currentValue < 0x80) {
  97. push(output, fromCharCode(currentValue));
  98. }
  99. }
  100. var basicLength = output.length; // number of basic code points.
  101. var handledCPCount = basicLength; // number of code points that have been handled;
  102. // Finish the basic string with a delimiter unless it's empty.
  103. if (basicLength) {
  104. push(output, delimiter);
  105. }
  106. // Main encoding loop:
  107. while (handledCPCount < inputLength) {
  108. // All non-basic code points < n have been handled already. Find the next larger one:
  109. var m = maxInt;
  110. for (i = 0; i < input.length; i++) {
  111. currentValue = input[i];
  112. if (currentValue >= n && currentValue < m) {
  113. m = currentValue;
  114. }
  115. }
  116. // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>, but guard against overflow.
  117. var handledCPCountPlusOne = handledCPCount + 1;
  118. if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
  119. throw $RangeError(OVERFLOW_ERROR);
  120. }
  121. delta += (m - n) * handledCPCountPlusOne;
  122. n = m;
  123. for (i = 0; i < input.length; i++) {
  124. currentValue = input[i];
  125. if (currentValue < n && ++delta > maxInt) {
  126. throw $RangeError(OVERFLOW_ERROR);
  127. }
  128. if (currentValue == n) {
  129. // Represent delta as a generalized variable-length integer.
  130. var q = delta;
  131. var k = base;
  132. while (true) {
  133. var t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
  134. if (q < t) break;
  135. var qMinusT = q - t;
  136. var baseMinusT = base - t;
  137. push(output, fromCharCode(digitToBasic(t + qMinusT % baseMinusT)));
  138. q = floor(qMinusT / baseMinusT);
  139. k += base;
  140. }
  141. push(output, fromCharCode(digitToBasic(q)));
  142. bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
  143. delta = 0;
  144. handledCPCount++;
  145. }
  146. }
  147. delta++;
  148. n++;
  149. }
  150. return join(output, '');
  151. };
  152. module.exports = function (input) {
  153. var encoded = [];
  154. var labels = split(replace(toLowerCase(input), regexSeparators, '\u002E'), '.');
  155. var i, label;
  156. for (i = 0; i < labels.length; i++) {
  157. label = labels[i];
  158. push(encoded, exec(regexNonASCII, label) ? 'xn--' + encode(label) : label);
  159. }
  160. return join(encoded, '.');
  161. };