Back to index

lightning-sunbird  0.9+nobinonly
nsIDNService.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is mozilla.org code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 2002
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Naoki Hotta <nhotta@netscape.com> (original author)
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either the GNU General Public License Version 2 or later (the "GPL"), or
00027  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00039 #include "nsIDNService.h"
00040 #include "nsReadableUtils.h"
00041 #include "nsCRT.h"
00042 #include "nsUnicharUtils.h"
00043 #include "nsIServiceManager.h"
00044 #include "nsIPrefService.h"
00045 #include "nsIPrefBranch.h"
00046 #include "nsIPrefBranch2.h"
00047 #include "nsIObserverService.h"
00048 #include "nsISupportsPrimitives.h"
00049 #include "punycode.h"
00050 
00051 //-----------------------------------------------------------------------------
00052 // RFC 1034 - 3.1. Name space specifications and terminology
00053 static const PRUint32 kMaxDNSNodeLen = 63;
00054 
00055 //-----------------------------------------------------------------------------
00056 
00057 #define NS_NET_PREF_IDNTESTBED      "network.IDN_testbed"
00058 #define NS_NET_PREF_IDNPREFIX       "network.IDN_prefix"
00059 #define NS_NET_PREF_IDNBLACKLIST    "network.IDN.blacklist_chars"
00060 
00061 inline PRBool isOnlySafeChars(const nsAFlatString& in,
00062                               const nsAFlatString& blacklist)
00063 {
00064   return (blacklist.IsEmpty() ||
00065           in.FindCharInSet(blacklist) == kNotFound);
00066 }
00067 
00068 //-----------------------------------------------------------------------------
00069 // nsIDNService
00070 //-----------------------------------------------------------------------------
00071 
00072 /* Implementation file */
00073 NS_IMPL_THREADSAFE_ISUPPORTS3(nsIDNService,
00074                               nsIIDNService,
00075                               nsIObserver,
00076                               nsISupportsWeakReference)
00077 
00078 nsresult nsIDNService::Init()
00079 {
00080   nsCOMPtr<nsIPrefBranch2> prefInternal(do_GetService(NS_PREFSERVICE_CONTRACTID));
00081   if (prefInternal) {
00082     prefInternal->AddObserver(NS_NET_PREF_IDNTESTBED, this, PR_TRUE); 
00083     prefInternal->AddObserver(NS_NET_PREF_IDNPREFIX, this, PR_TRUE); 
00084     prefInternal->AddObserver(NS_NET_PREF_IDNBLACKLIST, this, PR_TRUE);
00085     prefsChanged(prefInternal, nsnull);
00086   }
00087   return NS_OK;
00088 }
00089 
00090 NS_IMETHODIMP nsIDNService::Observe(nsISupports *aSubject,
00091                                     const char *aTopic,
00092                                     const PRUnichar *aData)
00093 {
00094   if (!strcmp(aTopic, NS_PREFBRANCH_PREFCHANGE_TOPIC_ID)) {
00095     nsCOMPtr<nsIPrefBranch> prefBranch( do_QueryInterface(aSubject) );
00096     if (prefBranch)
00097       prefsChanged(prefBranch, aData);
00098   }
00099   return NS_OK;
00100 }
00101 
00102 void nsIDNService::prefsChanged(nsIPrefBranch *prefBranch, const PRUnichar *pref)
00103 {
00104   if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNTESTBED).Equals(pref)) {
00105     PRBool val;
00106     if (NS_SUCCEEDED(prefBranch->GetBoolPref(NS_NET_PREF_IDNTESTBED, &val)))
00107       mMultilingualTestBed = val;
00108   }
00109   if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNPREFIX).Equals(pref)) {
00110     nsXPIDLCString prefix;
00111     nsresult rv = prefBranch->GetCharPref(NS_NET_PREF_IDNPREFIX, getter_Copies(prefix));
00112     if (NS_SUCCEEDED(rv) && prefix.Length() <= kACEPrefixLen)
00113       PL_strncpyz(nsIDNService::mACEPrefix, prefix.get(), kACEPrefixLen + 1);
00114   }
00115   if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNBLACKLIST).Equals(pref)) {
00116     nsCOMPtr<nsISupportsString> blacklist;
00117     nsresult rv = prefBranch->GetComplexValue(NS_NET_PREF_IDNBLACKLIST,
00118                                               NS_GET_IID(nsISupportsString),
00119                                               getter_AddRefs(blacklist));
00120     if (NS_SUCCEEDED(rv))
00121       blacklist->ToString(getter_Copies(mIDNBlacklist));
00122     else
00123       mIDNBlacklist.Truncate();
00124   }
00125 }
00126 
00127 nsIDNService::nsIDNService()
00128 {
00129   nsresult rv;
00130 
00131   // initialize to the official prefix (RFC 3490 "5. ACE prefix")
00132   const char kIDNSPrefix[] = "xn--";
00133   strcpy(mACEPrefix, kIDNSPrefix);
00134 
00135   mMultilingualTestBed = PR_FALSE;
00136 
00137   if (idn_success != idn_nameprep_create(NULL, &mNamePrepHandle))
00138     mNamePrepHandle = nsnull;
00139 
00140   mNormalizer = do_GetService(NS_UNICODE_NORMALIZER_CONTRACTID, &rv);
00141   if (NS_FAILED(rv))
00142     mNormalizer = nsnull;
00143   /* member initializers and constructor code */
00144 }
00145 
00146 nsIDNService::~nsIDNService()
00147 {
00148   idn_nameprep_destroy(mNamePrepHandle);
00149 }
00150 
00151 /* ACString ConvertUTF8toACE (in AUTF8String input); */
00152 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACString & ace)
00153 {
00154   // protect against bogus input
00155   NS_ENSURE_TRUE(IsUTF8(input), NS_ERROR_UNEXPECTED);
00156 
00157   nsresult rv;
00158   NS_ConvertUTF8toUCS2 ustr(input);
00159 
00160   // map ideographic period to ASCII period etc.
00161   normalizeFullStops(ustr);
00162 
00163 
00164   PRUint32 len, offset;
00165   len = 0;
00166   offset = 0;
00167   nsCAutoString encodedBuf;
00168 
00169   nsAString::const_iterator start, end;
00170   ustr.BeginReading(start); 
00171   ustr.EndReading(end); 
00172   ace.Truncate();
00173 
00174   // encode nodes if non ASCII
00175   while (start != end) {
00176     len++;
00177     if (*start++ == (PRUnichar)'.') {
00178       rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf);
00179       NS_ENSURE_SUCCESS(rv, rv);
00180 
00181       ace.Append(encodedBuf);
00182       ace.Append('.');
00183       offset += len;
00184       len = 0;
00185     }
00186   }
00187 
00188   // add extra node for multilingual test bed
00189   if (mMultilingualTestBed)
00190     ace.AppendLiteral("mltbd.");
00191   // encode the last node if non ASCII
00192   if (len) {
00193     rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf);
00194     NS_ENSURE_SUCCESS(rv, rv);
00195 
00196     ace.Append(encodedBuf);
00197   }
00198 
00199   return NS_OK;
00200 }
00201 
00202 /* [noscript] string ConvertACEtoUTF8 (in string input); */
00203 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACString & _retval)
00204 {
00205   // RFC 3490 - 4.2 ToUnicode
00206   // ToUnicode never fails.  If any step fails, then the original input
00207   // sequence is returned immediately in that step.
00208 
00209   if (!IsASCII(input)) {
00210     _retval.Assign(input);
00211     return NS_OK;
00212   }
00213   
00214   PRUint32 len = 0, offset = 0;
00215   nsCAutoString decodedBuf;
00216 
00217   nsACString::const_iterator start, end;
00218   input.BeginReading(start); 
00219   input.EndReading(end); 
00220   _retval.Truncate();
00221 
00222   // loop and decode nodes
00223   while (start != end) {
00224     len++;
00225     if (*start++ == '.') {
00226       if (NS_FAILED(decodeACE(Substring(input, offset, len - 1), decodedBuf))) {
00227         _retval.Assign(input);
00228         return NS_OK;
00229       }
00230 
00231       _retval.Append(decodedBuf);
00232       _retval.Append('.');
00233       offset += len;
00234       len = 0;
00235     }
00236   }
00237   // decode the last node
00238   if (len) {
00239     if (NS_FAILED(decodeACE(Substring(input, offset, len), decodedBuf)))
00240       _retval.Assign(input);
00241     else
00242       _retval.Append(decodedBuf);
00243   }
00244 
00245   return NS_OK;
00246 }
00247 
00248 /* boolean encodedInACE (in ACString input); */
00249 NS_IMETHODIMP nsIDNService::IsACE(const nsACString & input, PRBool *_retval)
00250 {
00251   nsACString::const_iterator begin;
00252   input.BeginReading(begin);
00253 
00254   const char *data = begin.get();
00255   PRUint32 dataLen = begin.size_forward();
00256 
00257   // look for the ACE prefix in the input string.  it may occur
00258   // at the beginning of any segment in the domain name.  for
00259   // example: "www.xn--ENCODED.com"
00260 
00261   const char *p = PL_strncasestr(data, mACEPrefix, dataLen);
00262 
00263   *_retval = p && (p == data || *(p - 1) == '.');
00264   return NS_OK;
00265 }
00266 
00267 NS_IMETHODIMP nsIDNService::Normalize(const nsACString & input, nsACString & output)
00268 {
00269   // protect against bogus input
00270   NS_ENSURE_TRUE(IsUTF8(input), NS_ERROR_UNEXPECTED);
00271 
00272   NS_ConvertUTF8toUTF16 inUTF16(input);
00273   normalizeFullStops(inUTF16);
00274 
00275   nsAutoString outUTF16;
00276   nsresult rv = stringPrep(inUTF16, outUTF16);
00277   if (NS_FAILED(rv))
00278     return rv;
00279 
00280   CopyUTF16toUTF8(outUTF16, output);
00281   if (!isOnlySafeChars(outUTF16, mIDNBlacklist))
00282     return ConvertUTF8toACE(output, output);
00283 
00284   return NS_OK;
00285 }
00286 
00287 //-----------------------------------------------------------------------------
00288 
00289 static void utf16ToUcs4(const nsAString& in, PRUint32 *out, PRUint32 outBufLen, PRUint32 *outLen)
00290 {
00291   PRUint32 i = 0;
00292   nsAString::const_iterator start, end;
00293   in.BeginReading(start); 
00294   in.EndReading(end); 
00295 
00296   while (start != end) {
00297     PRUnichar curChar;
00298 
00299     curChar= *start++;
00300 
00301     if (start != end &&
00302         IS_HIGH_SURROGATE(curChar) && 
00303         IS_LOW_SURROGATE(*start)) {
00304       out[i] = SURROGATE_TO_UCS4(curChar, *start);
00305       ++start;
00306     }
00307     else
00308       out[i] = curChar;
00309 
00310     i++;
00311     if (i >= outBufLen) {
00312       NS_ERROR("input too big, the result truncated");
00313       out[outBufLen-1] = (PRUint32)'\0';
00314       *outLen = i;
00315       return;
00316     }
00317   }
00318   out[i] = (PRUint32)'\0';
00319   *outLen = i;
00320 }
00321 
00322 static void ucs4toUtf16(const PRUint32 *in, nsAString& out)
00323 {
00324   while (*in) {
00325     if (!IS_IN_BMP(*in)) {
00326       out.Append((PRUnichar) H_SURROGATE(*in));
00327       out.Append((PRUnichar) L_SURROGATE(*in));
00328     }
00329     else
00330       out.Append((PRUnichar) *in);
00331     in++;
00332   }
00333 }
00334 
00335 static nsresult punycode(const char* prefix, const nsAString& in, nsACString& out)
00336 {
00337   PRUint32 ucs4Buf[kMaxDNSNodeLen + 1];
00338   PRUint32 ucs4Len;
00339   utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);
00340 
00341   // need maximum 20 bits to encode 16 bit Unicode character
00342   // (include null terminator)
00343   const PRUint32 kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1;  
00344   char encodedBuf[kEncodedBufSize];
00345   punycode_uint encodedLength = kEncodedBufSize;
00346 
00347   enum punycode_status status = punycode_encode(ucs4Len,
00348                                                 ucs4Buf,
00349                                                 nsnull,
00350                                                 &encodedLength,
00351                                                 encodedBuf);
00352 
00353   if (punycode_success != status ||
00354       encodedLength >= kEncodedBufSize)
00355     return NS_ERROR_FAILURE;
00356 
00357   encodedBuf[encodedLength] = '\0';
00358   out.Assign(nsDependentCString(prefix) + nsDependentCString(encodedBuf));
00359 
00360   return NS_OK;
00361 }
00362 
00363 static nsresult encodeToRACE(const char* prefix, const nsAString& in, nsACString& out)
00364 {
00365   // need maximum 20 bits to encode 16 bit Unicode character
00366   // (include null terminator)
00367   const PRUint32 kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1;  
00368 
00369   // set up a work buffer for RACE encoder
00370   PRUnichar temp[kMaxDNSNodeLen + 2];
00371   temp[0] = 0xFFFF;   // set a place holder (to be filled by get_compress_mode)
00372   temp[in.Length() + 1] = (PRUnichar)'\0';
00373 
00374   nsAString::const_iterator start, end;
00375   in.BeginReading(start); 
00376   in.EndReading(end);
00377   
00378   for (PRUint32 i = 1; start != end; i++)
00379     temp[i] = *start++;
00380 
00381   // encode nodes if non ASCII
00382 
00383   char encodedBuf[kEncodedBufSize];
00384   idn_result_t result = race_compress_encode((const unsigned short *) temp, 
00385                                              get_compress_mode((unsigned short *) temp + 1), 
00386                                              encodedBuf, kEncodedBufSize);
00387   if (idn_success != result)
00388     return NS_ERROR_FAILURE;
00389 
00390   out.Assign(prefix);
00391   out.Append(encodedBuf);
00392 
00393   return NS_OK;
00394 }
00395 
00396 // RFC 3454
00397 //
00398 // 1) Map -- For each character in the input, check if it has a mapping
00399 // and, if so, replace it with its mapping. This is described in section 3.
00400 //
00401 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
00402 // normalization. This is described in section 4.
00403 //
00404 // 3) Prohibit -- Check for any characters that are not allowed in the
00405 // output. If any are found, return an error. This is described in section
00406 // 5.
00407 //
00408 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
00409 // are found, make sure that the whole string satisfies the requirements
00410 // for bidirectional strings. If the string does not satisfy the requirements
00411 // for bidirectional strings, return an error. This is described in section 6.
00412 //
00413 nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out)
00414 {
00415   if (!mNamePrepHandle || !mNormalizer)
00416     return NS_ERROR_FAILURE;
00417 
00418   nsresult rv = NS_OK;
00419   PRUint32 ucs4Buf[kMaxDNSNodeLen + 1];
00420   PRUint32 ucs4Len;
00421   utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);
00422 
00423   // map
00424   idn_result_t idn_err;
00425 
00426   PRUint32 namePrepBuf[kMaxDNSNodeLen * 3];   // map up to three characters
00427   idn_err = idn_nameprep_map(mNamePrepHandle, (const PRUint32 *) ucs4Buf,
00428                                    (PRUint32 *) namePrepBuf, kMaxDNSNodeLen * 3);
00429   NS_ENSURE_TRUE(idn_err == idn_success, NS_ERROR_FAILURE);
00430 
00431   nsAutoString namePrepStr;
00432   ucs4toUtf16(namePrepBuf, namePrepStr);
00433   if (namePrepStr.Length() >= kMaxDNSNodeLen)
00434     return NS_ERROR_FAILURE;
00435 
00436   // normalize
00437   nsAutoString normlizedStr;
00438   rv = mNormalizer->NormalizeUnicodeNFKC(namePrepStr, normlizedStr);
00439   if (normlizedStr.Length() >= kMaxDNSNodeLen)
00440     return NS_ERROR_FAILURE;
00441 
00442   // prohibit
00443   const PRUint32 *found = nsnull;
00444   idn_err = idn_nameprep_isprohibited(mNamePrepHandle, 
00445                                       (const PRUint32 *) ucs4Buf, &found);
00446   if (idn_err != idn_success || found)
00447     return NS_ERROR_FAILURE;
00448 
00449   // check bidi
00450   idn_err = idn_nameprep_isvalidbidi(mNamePrepHandle, 
00451                                      (const PRUint32 *) ucs4Buf, &found);
00452   if (idn_err != idn_success || found)
00453     return NS_ERROR_FAILURE;
00454 
00455   // set the result string
00456   out.Assign(normlizedStr);
00457 
00458   return rv;
00459 }
00460 
00461 nsresult nsIDNService::encodeToACE(const nsAString& in, nsACString& out)
00462 {
00463   // RACE encode is supported for existing testing environment
00464   if (!strcmp("bq--", mACEPrefix))
00465     return encodeToRACE(mACEPrefix, in, out);
00466   
00467   // use punycoce
00468   return punycode(mACEPrefix, in, out);
00469 }
00470 
00471 nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out)
00472 {
00473   nsresult rv = NS_OK;
00474 
00475   out.Truncate();
00476 
00477   if (in.Length() > kMaxDNSNodeLen) {
00478     NS_ERROR("IDN node too large");
00479     return NS_ERROR_FAILURE;
00480   }
00481 
00482   if (IsASCII(in))
00483     CopyUCS2toASCII(in, out);
00484   else {
00485     nsAutoString strPrep;
00486     rv = stringPrep(in, strPrep);
00487     if (NS_SUCCEEDED(rv)) {
00488       if (IsASCII(strPrep))
00489         CopyUCS2toASCII(strPrep, out);
00490       else
00491         rv = encodeToACE(strPrep, out);
00492     }
00493   }
00494 
00495   if (out.Length() > kMaxDNSNodeLen) {
00496     NS_ERROR("IDN node too large");
00497     return NS_ERROR_FAILURE;
00498   }
00499 
00500   return rv;
00501 }
00502 
00503 // RFC 3490
00504 // 1) Whenever dots are used as label separators, the following characters
00505 //    MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
00506 //    stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
00507 //    stop).
00508 
00509 void nsIDNService::normalizeFullStops(nsAString& s)
00510 {
00511   nsAString::const_iterator start, end;
00512   s.BeginReading(start); 
00513   s.EndReading(end); 
00514   PRInt32 index = 0;
00515 
00516   while (start != end) {
00517     switch (*start) {
00518       case 0x3002:
00519       case 0xFF0E:
00520       case 0xFF61:
00521         s.Replace(index, 1, NS_LITERAL_STRING("."));
00522         break;
00523       default:
00524         break;
00525     }
00526     start++;
00527     index++;
00528   }
00529 }
00530 
00531 nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out)
00532 {
00533   PRBool isAce;
00534   IsACE(in, &isAce);
00535   if (!isAce) {
00536     out.Assign(in);
00537     return NS_OK;
00538   }
00539 
00540   // RFC 3490 - 4.2 ToUnicode
00541   // The ToUnicode output never contains more code points than its input.
00542   punycode_uint output_length = in.Length() - kACEPrefixLen + 1;
00543   punycode_uint *output = new punycode_uint[output_length];
00544   NS_ENSURE_TRUE(output, NS_ERROR_OUT_OF_MEMORY);
00545 
00546   enum punycode_status status = punycode_decode(in.Length() - kACEPrefixLen,
00547                                                 PromiseFlatCString(in).get() + kACEPrefixLen,
00548                                                 &output_length,
00549                                                 output,
00550                                                 nsnull);
00551   if (status != punycode_success) {
00552     delete [] output;
00553     return NS_ERROR_FAILURE;
00554   }
00555 
00556   // UCS4 -> UTF8
00557   output[output_length] = 0;
00558   nsAutoString utf16;
00559   ucs4toUtf16(output, utf16);
00560   delete [] output;
00561   if (!isOnlySafeChars(utf16, mIDNBlacklist))
00562     return NS_ERROR_FAILURE;
00563   CopyUTF16toUTF8(utf16, out);
00564 
00565   // Validation: encode back to ACE and compare the strings
00566   nsCAutoString ace;
00567   nsresult rv = ConvertUTF8toACE(out, ace);
00568   NS_ENSURE_SUCCESS(rv, rv);
00569 
00570   if (!ace.Equals(in, nsCaseInsensitiveCStringComparator()))
00571     return NS_ERROR_FAILURE;
00572 
00573   return NS_OK;
00574 }