1
0

utf.js 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. /* utf.js - UTF-8 <=> UTF-16 convertion
  2. *
  3. * Copyright (C) 1999 Masanao Izumo <iz@onicos.co.jp>
  4. * Version: 1.0
  5. * LastModified: Dec 25 1999
  6. * This library is free. You can redistribute it and/or modify it.
  7. */
  8. /*
  9. * Interfaces:
  10. * utf8 = utf16to8(utf16);
  11. * utf16 = utf16to8(utf8);
  12. */
  13. function utf16to8(str) {
  14. var out, i, len, c;
  15. out = "";
  16. len = str.length;
  17. for(i = 0; i < len; i++) {
  18. c = str.charCodeAt(i);
  19. if ((c >= 0x0001) && (c <= 0x007F)) {
  20. out += str.charAt(i);
  21. } else if (c > 0x07FF) {
  22. out += String.fromCharCode(0xE0 | ((c >> 12) & 0x0F));
  23. out += String.fromCharCode(0x80 | ((c >> 6) & 0x3F));
  24. out += String.fromCharCode(0x80 | ((c >> 0) & 0x3F));
  25. } else {
  26. out += String.fromCharCode(0xC0 | ((c >> 6) & 0x1F));
  27. out += String.fromCharCode(0x80 | ((c >> 0) & 0x3F));
  28. }
  29. }
  30. return out;
  31. }
  32. function utf8to16(str) {
  33. var out, i, len, c;
  34. var char2, char3;
  35. out = "";
  36. len = str.length;
  37. i = 0;
  38. while(i < len) {
  39. c = str.charCodeAt(i++);
  40. switch(c >> 4)
  41. {
  42. case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
  43. // 0xxxxxxx
  44. out += str.charAt(i-1);
  45. break;
  46. case 12: case 13:
  47. // 110x xxxx 10xx xxxx
  48. char2 = str.charCodeAt(i++);
  49. out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F));
  50. break;
  51. case 14:
  52. // 1110 xxxx 10xx xxxx 10xx xxxx
  53. char2 = str.charCodeAt(i++);
  54. char3 = str.charCodeAt(i++);
  55. out += String.fromCharCode(((c & 0x0F) << 12) |
  56. ((char2 & 0x3F) << 6) |
  57. ((char3 & 0x3F) << 0));
  58. break;
  59. }
  60. }
  61. return out;
  62. }