< Previous by Date Date Index Next by Date >
  Thread Index Next in Thread >

[reSIProcate] Uri.cxx and encoding




I've been working on Uri.cxx/Uri.hxx, implementing a lookup table for performance and also allowing the application developer to over-ride default encoding behaviour.

Does anyone have any comments on this?


Index: Uri.hxx
===================================================================
--- Uri.hxx     (revision 5867)
+++ Uri.hxx     (working copy)
@@ -1,11 +1,15 @@
#if !defined(RESIP_URI_HXX)
#define RESIP_URI_HXX

+#include <bitset>
#include <cassert>

#include "resip/stack/ParserCategory.hxx"
#include "rutil/HeapInstanceCounter.hxx"

+#define URI_ENCODING_TABLE_SIZE 256
+#define URI_ENCODING_REGULAR_CHARS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.!~*\()&=+$,;?/"
+
namespace resip
{
class SipMessage;
@@ -24,6 +28,15 @@
      //static Uri fromTel(const Uri&, const Data& host);  // deprecate...
      static Uri fromTel(const Uri&, const Uri& hostUri);

+      // Specify characters we would prefer not to escape (e.g. #),
+      // allows you to achieve behaviour which is not consistent
+      // with RFC 2396
+      static Data* getNonEncodedChars();
+      // Resets the set of characters to include only those specified:
+      static void setNonEncodedChars(const Data& chars);
+      // Adds to the already known set of characters:
+      static void addNonEncodedChars(const Data& chars);
+
      Data& host() {checkParsed(); return mHost;}
      const Data& host() const {checkParsed(); return mHost;}
      Data& user() {checkParsed(); return mUser;}
@@ -95,9 +108,16 @@
      // cache for IPV6 host comparison
      mutable Data mCanonicalHost;

+      // For looking up characters that don't require escaping
+      static bool shouldEscapeUserChar(unsigned char c);
+      static Data* mPlainTextChars;
+      static std::bitset<URI_ENCODING_TABLE_SIZE>* encodingTable;
+      static void updateEncodingTable();
+
   private:
      Data mEmbeddedHeadersText;
      SipMessage* mEmbeddedHeaders;
+
};

}
Index: Uri.cxx
===================================================================
--- Uri.cxx     (revision 5867)
+++ Uri.cxx     (working copy)
@@ -31,6 +31,48 @@
{
}

+Data* Uri::mPlainTextChars = NULL;
+std::bitset<URI_ENCODING_TABLE_SIZE>* Uri::encodingTable = NULL;
+
+Data* Uri::getNonEncodedChars() {
+   if(mPlainTextChars != NULL)
+      return new Data(mPlainTextChars);
+   return new Data();
+}
+
+// Provide a list of characters that should never be URL encoded
+// Useful to prevent the escaping of the `#' character when sent
+// as part of a phone number
+void Uri::setNonEncodedChars(const Data& chars) {
+   if(mPlainTextChars != NULL)
+      delete mPlainTextChars;
+   mPlainTextChars = new Data(chars);
+   updateEncodingTable();
+}
+
+void Uri::addNonEncodedChars(const Data& chars) {
+   if(mPlainTextChars != NULL) {
+      Data* allChars = new Data(*mPlainTextChars + chars);
+      delete mPlainTextChars;
+      mPlainTextChars = allChars;
+   } else
+      mPlainTextChars = new Data(chars);
+   updateEncodingTable();
+}
+
+void Uri::updateEncodingTable() {
+   if(encodingTable == NULL) {
+      // Do first invocation - set default values
+      encodingTable = new std::bitset<URI_ENCODING_TABLE_SIZE>;
+      mPlainTextChars = new Data(URI_ENCODING_REGULAR_CHARS);
+   }
+   encodingTable->reset();
+   if(mPlainTextChars != NULL)
+      for(unsigned int i = 0; i < mPlainTextChars->size(); i++)
+         encodingTable->set((unsigned char)((*mPlainTextChars)[i]));
+}
+
+
static const Data parseContext("Uri constructor");
Uri::Uri(const Data& data)
   : ParserCategory(),
@@ -830,9 +872,19 @@
   return new Uri(*this);
}

-inline bool //.dcm. replace with lookup array
-shoudEscapeUserChar(char c)
+//inline bool //.dcm. replace with lookup array
+bool Uri::shouldEscapeUserChar(unsigned char c)
{
+
+   // Must be first invocation - set up sensible default values
+   if(Uri::encodingTable == NULL)
+      Uri::updateEncodingTable();
+   if(c >= Uri::encodingTable->size())
+      return true;
+
+   return (!Uri::encodingTable->test(c));
+
+/*
   if ( (c >= 'a' && c <= 'z') ||
        (c >= 'A' && c <= 'Z') ||
        (c >= '0' && c <= '9'))
@@ -863,7 +915,7 @@
         return false;
      default:
         return true;
-   }
+   } */
}

inline bool //.dcm. replace with lookup array
@@ -895,7 +947,7 @@
         return false;
      default:
         return true;
-   }
+   }
}

// should not encode user parameters unless its a tel?
@@ -906,7 +958,7 @@
   if (!mUser.empty())
   {
#ifdef HANDLE_CHARACTER_ESCAPING
-      mUser.escapeToStream(str, shoudEscapeUserChar);
+      mUser.escapeToStream(str, shouldEscapeUserChar);
#else
      str << mUser;
#endif