Friday, June 6, 2008

Decode HTML Strings in SQL Server

SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO

-- EXEC UrlDecode '%c3%a1'
-- EXEC UrlDecode '\\Tul1ciprmw1\RichMediaAssets\Open\Telemundo\Al Rojo Vivo con Mar%c3%ada Celeste\_derived_jpg_q90_172x128_m1\3ARValx05Mar%c3%ada Celeste Arrar%c3%a1s.jpg'
-- EXEC UrlDecode '%4D'

ALTER PROCEDURE UrlDecode
-- Add the parameters for the stored procedure here
@URL varchar(2000)
AS
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;

DECLARE @Position INT,
@Base CHAR(16),
@High TINYINT,
@Low TINYINT,
@NewHigh INT,
@Pattern CHAR(21)

SELECT @Base = '0123456789abcdef',
@Pattern = '%[%][0-9a-f][0-9a-f]%',
@Position = PATINDEX(@Pattern, @URL)

SELECT @High = CHARINDEX(SUBSTRING(@URL, @Position + 1, 1), @Base) -1,
@Low = CHARINDEX(SUBSTRING(@URL, @Position + 2, 1), @Base) -1

WHILE @Position > 0
BEGIN

IF (@High & 15) = 12 -- xC0
BEGIN
SELECT @NewHigh = @Low * POWER(2, 6) --Shift Low 6 bits

SELECT @High = CHARINDEX(SUBSTRING(@URL, @Position + 4, 1), @Base) -1
SELECT @Low = CHARINDEX(SUBSTRING(@URL, @Position + 5, 1), @Base) -1

SELECT @URL = STUFF(@URL, @Position, 6, CHAR(@NewHigh | (16 * @High) | @Low))

SELECT @High = 0, @Low = 0, @NewHigh = 0

END
ELSE
BEGIN
SELECT @URL = STUFF(@URL, @Position, 3, CHAR(16 * @High | @Low))

END

SELECT @Position = PATINDEX(@Pattern, @URL)
IF @Position > 0
BEGIN
SELECT @High = CHARINDEX(SUBSTRING(@URL, @Position + 1, 1), @Base) -1,
@Low = CHARINDEX(SUBSTRING(@URL, @Position + 2, 1), @Base) -1
END
END

SELECT @URL

END
GO

2 comments:

Sebos said...

nice try ^^ i get an error on trying : UrlDecode 'vacances dété?'
it says (something like) :
Error arithmetic overflow for tinyint, value = -1

Gordon said...

I think what you're looking for is URL Encode. This routine translates the hexidecimal into the Unicode chars.

If you change the TINYINTs to INTS it will work, but it won't encode it.