url.cpp 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. #include "precomp_wasabi_bfc.h"
  2. #include "url.h"
  3. #include <bfc/wasabi_std.h>
  4. void Url::encode(StringW &dest, int use_plus_for_space, int encoding, int style)
  5. {
  6. if (dest.isempty()) return;
  7. StringW srcstr = dest;
  8. const wchar_t *src = srcstr;
  9. dest = NULL;
  10. /*
  11. if (encoding & URLENCODE_EXCLUDEHTTPPREFIX)
  12. if (!_wcsnicmp(src, L"http://", 7))
  13. {
  14. src += 7;
  15. dest += L"http://";
  16. }
  17. */
  18. while (src && *src)
  19. {
  20. int encode = 1;
  21. // if (encoding & URLENCODE_NOTHING) encode = 0;
  22. if ((encoding & URLENCODE_EXCLUDEALPHANUM)
  23. && (ISALPHA(*src) || ISDIGIT(*src)
  24. || *src == '_' || *src == '-' || *src == '.' || *src == '~')
  25. && *src < 128)
  26. encode = 0;
  27. // if ((encoding & URLENCODE_EXCLUDE_8BIT) && (*src > 127)) encode = 0;
  28. if ((encoding & URLENCODE_EXCLUDE_ABOVEEQ32) && (*src >= 32)) encode = 0;
  29. // if ((encoding & URLENCODE_ENCODESPACE) && *src == ' ') encode = 1;
  30. // if ((encoding & URLENCODE_ENCODEXML) && (*src == '<' || *src == '>' || *src == '&')) encode = 1;
  31. if ((*src == '&' && (style == URLENCODE_STYLE_ANDPOUND || style == URLENCODE_STYLE_ANDPOUNDX)) ||
  32. (*src == '%' && style == URLENCODE_STYLE_PERCENT)) encode = 1;
  33. if ((encoding & URLENCODE_EXCLUDESLASH) && (*src == '/' || *src == ':')) encode = 0;
  34. if (!encode)
  35. {
  36. dest += *src;
  37. }
  38. else if (use_plus_for_space && *src == ' ')
  39. {
  40. dest += '+';
  41. }
  42. else
  43. {
  44. switch (style)
  45. {
  46. case URLENCODE_STYLE_PERCENT:
  47. dest += StringPrintfW(L"%%%02X", (int) * src);
  48. break;
  49. case URLENCODE_STYLE_ANDPOUND:
  50. dest += StringPrintfW(L"&#%02d;", (int) * src);
  51. break;
  52. case URLENCODE_STYLE_ANDPOUNDX:
  53. dest += StringPrintfW(L"&#x%02X;", (int) * src);
  54. break;
  55. }
  56. }
  57. src++;
  58. }
  59. }
  60. void Url::encode(String &dest, int use_plus_for_space, int encoding, int style)
  61. {
  62. if (dest.isempty()) return;
  63. String srcstr = dest;
  64. const char *src = srcstr;
  65. dest = NULL;
  66. /*
  67. if (encoding & URLENCODE_EXCLUDEHTTPPREFIX)
  68. if (!_wcsnicmp(src, L"http://", 7))
  69. {
  70. src += 7;
  71. dest += L"http://";
  72. }
  73. */
  74. while (src && *src)
  75. {
  76. int encode = 1;
  77. // if (encoding & URLENCODE_NOTHING) encode = 0;
  78. if ((encoding & URLENCODE_EXCLUDEALPHANUM)
  79. && (ISALPHA(*src) || ISDIGIT(*src)
  80. || *src == '_' || *src == '-' || *src == '.' || *src == '~')
  81. && *(unsigned char *)src < 128)
  82. encode = 0;
  83. // if ((encoding & URLENCODE_EXCLUDE_8BIT) && (*src > 127)) encode = 0;
  84. if ((encoding & URLENCODE_EXCLUDE_ABOVEEQ32) && (*src >= 32)) encode = 0;
  85. // if ((encoding & URLENCODE_ENCODESPACE) && *src == ' ') encode = 1;
  86. // if ((encoding & URLENCODE_ENCODEXML) && (*src == '<' || *src == '>' || *src == '&')) encode = 1;
  87. if ((*src == '&' && (style == URLENCODE_STYLE_ANDPOUND || style == URLENCODE_STYLE_ANDPOUNDX)) ||
  88. (*src == '%' && style == URLENCODE_STYLE_PERCENT)) encode = 1;
  89. if ((encoding & URLENCODE_EXCLUDESLASH) && (*src == '/' || *src == ':')) encode = 0;
  90. if (!encode)
  91. {
  92. dest += *src;
  93. }
  94. else if (use_plus_for_space && *src == ' ')
  95. {
  96. dest += '+';
  97. }
  98. else
  99. {
  100. switch (style)
  101. {
  102. case URLENCODE_STYLE_PERCENT:
  103. dest += StringPrintf("%%%02X", (unsigned char)*src);
  104. break;
  105. case URLENCODE_STYLE_ANDPOUND:
  106. dest += StringPrintf("&#%02d;", (unsigned char)*src);
  107. break;
  108. case URLENCODE_STYLE_ANDPOUNDX:
  109. dest += StringPrintf("&#x%02X;", (unsigned char)*src);
  110. break;
  111. }
  112. }
  113. src++;
  114. }
  115. }
  116. void Url::decode(StringW &str, int use_plus_for_space)
  117. {
  118. if (str.isempty()) return;
  119. Url::decode(str.getNonConstVal());
  120. }
  121. static uint8_t quickhex(wchar_t c)
  122. {
  123. int hexvalue = c;
  124. if (hexvalue & 0x10)
  125. hexvalue &= ~0x30;
  126. else
  127. {
  128. hexvalue &= 0xF;
  129. hexvalue += 9;
  130. }
  131. return hexvalue;
  132. }
  133. static uint8_t DecodeEscape(const wchar_t *&str)
  134. {
  135. uint8_t a = quickhex(*++str);
  136. uint8_t b = quickhex(*++str);
  137. str++;
  138. return a * 16 + b;
  139. }
  140. static void DecodeEscapedUTF8(wchar_t *&output, const wchar_t *&input)
  141. {
  142. uint8_t utf8_data[1024] = {0}; // hopefully big enough!!
  143. int num_utf8_words=0;
  144. bool error=false;
  145. while (input && *input && *input == '%' && num_utf8_words < sizeof(utf8_data))
  146. {
  147. if (iswxdigit(input[1]) && iswxdigit(input[2]))
  148. {
  149. utf8_data[num_utf8_words++]=DecodeEscape(input);
  150. }
  151. else if (input[1] == '%')
  152. {
  153. input+=2;
  154. utf8_data[num_utf8_words++]='%';
  155. }
  156. else
  157. {
  158. error = true;
  159. break;
  160. }
  161. }
  162. int len = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)utf8_data, num_utf8_words, 0, 0);
  163. MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)utf8_data, num_utf8_words, output, len);
  164. output += len;
  165. if (error)
  166. {
  167. *output++ = *input++;
  168. }
  169. }
  170. // benski> We have the luxury of knowing that decoding will ALWAYS produce smaller strings
  171. // so we can do it in-place
  172. void Url::decode(wchar_t *str)
  173. {
  174. const wchar_t *itr = str;
  175. while (itr && *itr)
  176. {
  177. switch (*itr)
  178. {
  179. case '%':
  180. DecodeEscapedUTF8(str, itr);
  181. break;
  182. default:
  183. *str++ = *itr++;
  184. break;
  185. }
  186. }
  187. *str = 0;
  188. }