// ==========================================================================
// JavaScript Tool for URL Encoding/Decoding
// Copyright (C) 2006 Netzreport (netzreport.googlepages.com)
//
// Website: http://netzreport.googlepages.com/online_tool_for_url_en_decoding.html
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
//
// The GNU General Public License is also available from:
// http://www.gnu.org/copyleft/gpl.html
//
// A local copy of the GNU General Public License is available here:
// http://netzreport.googlepages.com/gpl.txt
// ==========================================================================
//
// --------------------------------------------------------------------------
// 2006-12-18: Changed character encoding. Now, one can choose between URL
//             encoding/decoding strings that are character encoded as ASCII
//             or UTF-8.
// 2006-11-19: First release
// --------------------------------------------------------------------------

// According to RFC 3986, only characters from a set of reserved and a set
// of unreserved characters are allowed in a URL:
var unreserved = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_.~";
var reserved = "!*'();:@&=+$,/?%#[]";
var allowed = unreserved + reserved;
var hexchars = "0123456789ABCDEFabcdef";

// --------------------------------- Encoding -------------------------------

// This function returns a percent sign followed by two hexadecimal digits.
// Input is a decimal value not greater than 255.
function gethex(decimal) {
	if ( decimal == 32 )
		return "+";
	else
		return "%" + hexchars.charAt(decimal >> 4) + hexchars.charAt(decimal & 0xF);
}

function urlencode(decoded) {
	var encoded = "";
	for (var i = 0; i < decoded.length; i++ ) {
		var ch = decoded.charAt(i);
		// Check if character is an unreserved character:
		if (unreserved.indexOf(ch) != -1) {
			encoded += ch;
		} else {
			// The position in the Unicode table tells us how many bytes are needed.
			// Note that if we talk about first, second, etc. in the following, we are
			// counting from left to right:
			//
			//   Position in   |  Bytes needed   | Binary representation
			//  Unicode table  |   for UTF-8     |       of UTF-8
			// ----------------------------------------------------------
			//     0 -     127 |    1 byte       | 0XXX.XXXX
			//   128 -    2047 |    2 bytes      | 110X.XXXX 10XX.XXXX
			//  2048 -   65535 |    3 bytes      | 1110.XXXX 10XX.XXXX 10XX.XXXX
			// 65536 - 2097151 |    4 bytes      | 1111.0XXX 10XX.XXXX 10XX.XXXX 10XX.XXXX

			var charcode = decoded.charCodeAt(i);

			// Position 0 - 127 is equal to percent-encoding with an ASCII character encoding:
			if (charcode < 128) {
				encoded += gethex(charcode);
			}

			// Position 128 - 2047: two bytes for UTF-8 character encoding.
			if (charcode > 127 && charcode < 2048) {
				// First UTF byte: Mask the first five bits of charcode with binary 110X.XXXX:
				encoded += gethex((charcode >> 6) | 0xC0);
				// Second UTF byte: Get last six bits of charcode and mask them with binary 10XX.XXXX:
				encoded += gethex((charcode & 0x3F) | 0x80);
			}

			// Position 2048 - 65535: three bytes for UTF-8 character encoding.
			if (charcode > 2047 && charcode < 65536) {
				// First UTF byte: Mask the first four bits of charcode with binary 1110.XXXX:
				encoded += gethex((charcode >> 12) | 0xE0);
				// Second UTF byte: Get the next six bits of charcode and mask them binary 10XX.XXXX:
				encoded += gethex(((charcode >> 6) & 0x3F) | 0x80);
				// Third UTF byte: Get the last six bits of charcode and mask them binary 10XX.XXXX:
				encoded += gethex((charcode & 0x3F) | 0x80);
			}

			// Position 65536 - : four bytes for UTF-8 character encoding.
			if (charcode > 65535) {
				// First UTF byte: Mask the first three bits of charcode with binary 1111.0XXX:
				encoded += gethex((charcode >> 18) | 0xF0);
				// Second UTF byte: Get the next six bits of charcode and mask them binary 10XX.XXXX:
				encoded += gethex(((charcode >> 12) & 0x3F) | 0x80);
				// Third UTF byte: Get the last six bits of charcode and mask them binary 10XX.XXXX:
				encoded += gethex(((charcode >> 6) & 0x3F) | 0x80);
				// Fourth UTF byte: Get the last six bits of charcode and mask them binary 10XX.XXXX:
				encoded += gethex((charcode & 0x3F) | 0x80);
			}

		}

	}  // end of for ...

	return encoded;
}


// ------------------------------------------- Decoding --------------------------

// This function returns the integer value of the "%xx" string passed
function decodebyte(wot) {
	return parseInt(wot.substr(1,2),16);
}

// decode URL-encoded string
function urldecode(wot) {
	var retval = "";
	var i=0, c, u, b1, b2, b3, b4;
	while ( i < wot.length ) {
		c = wot.charAt(i);
		if ( c == "+" ) {
			retval += " ";
			i++;
			continue;
		}
		if ( c != "%" ) {
			retval += c;
			i++;
			continue;
		}
		c = wot.substr(i,3);
		b1 = decodebyte(c);
		if ( ( b1 & 0x80 ) == 0 ) {
			retval += String.fromCharCode(b1);
			i += 3;
			continue;
		}
		if ( ( b1 & 0xE0 ) == 0xC0 ) { // 110X XXXX - 2-byte encoding
			c = wot.substr(i+3,3);
			b2 = decodebyte(c);
			u = ( ( b1 & 0x1F ) << 6 ) | ( b2 & 0x3F );
			retval += String.fromCharCode(u);
			i += 6;
			continue;
		}
		if ( ( b1 & 0xF0 ) == 0xE0 ) { // 1110 XXXX - 3-byte encoding
			c = wot.substr(i+3,3);
			b2 = decodebyte(c);
			c = wot.substr(i+6,3);
			b3 = decodebyte(c);
			u = ( ( b1 & 0x0F ) << 12 ) | ( ( b2 & 0x3F ) << 6 ) | ( b3 & 0x3F );
			retval += String.fromCharCode(u);
			i += 9;
			continue;
		}
		if ( ( b1 & 0xF8 ) == 0xF0 ) { // 1111 0XXX - 4-byte encoding
			c = wot.substr(i+3,3);
			b2 = decodebyte(c);
			c = wot.substr(i+6,3);
			b3 = decodebyte(c);
			c = wot.substr(i+9,3);
			b4 = decodebyte(c);
			u = ( ( b1 & 0x07 ) << 18 ) | ( ( b2 & 0x3F ) << 12 ) | ( ( b3 & 0x3F ) << 6 ) | ( b4 & 0x3F );
			retval += String.fromCharCode(u);
			i += 12;
			continue;
		}
		retval += "?";
		i += 3;
	}
	return retval;
}

