FREE Subscription to Dr. Dobb’s Digest: Same Great Content, New Digital Edition
Site Archive (Complete)
Dobbs M-Dev
Email
Print
Reprint

add to:
Del.icio.us
Digg
Google
Furl
Slashdot
Y! MyWeb
Blink
October 01, 2003

URL Canonicalization Testing

(Page 3 of 16)
URL Canonicalization Testing

Listing 10 EncodeUrlUsingEncoder


void CUrlCanonicalizationTestCases::EncodeUrlUsingEncoder(IEncoder & encoder)
    {
    //-------------------------------------------------
    // All levels get the (fully canonicalized) source URL.
    AddUrl(sourceUrl->Protocol(), sourceUrl->Domain(), sourceUrl->Path());

    //-------------------------------------------------
    // Levels 1 or higher get the URL with "unsafe" (as defined by UrlEscape) 
    //    characters encoded.
    if (1 <= encodingLevel)
        {
        AddUrl(sourceUrl->Protocol()
            , EscapeUnsafeCharacters(sourceUrl->Domain())
            , EscapeUnsafeCharacters(sourceUrl->Path()));
        }

    //-------------------------------------------------
    // Levels 2 or higher get the URL with these various additions encoded to 
    //    the specified level.  HOWEVER, the encoders don't have the concept of
    //    "escape unsafe characters" (our level 1) -- they just strictly encode 
    //    whatever character/string is given them -- so we must reduce the
    //    encoding level we give them by one.
    if (2 <= encodingLevel)
        {
        long encodersEncodingLevel(encodingLevel - 1);

        //-------------------------------------------------
        // Authentication data prepended to the source URL, '@' encodings.
        CCharacterEncoder atEncoder(L'@', encodersEncodingLevel, encoder);
        for (unsigned long level = 0; level <= encodersEncodingLevel; ++level)
            {
            AddUrl(sourceUrl->Protocol()
                , L"username:password" + atEncoder.PartialEncoding(level) 
                    + sourceUrl->Domain()
                , sourceUrl->Path());
            AddUrl(sourceUrl->Protocol()
                , L"blahblah" + atEncoder.PartialEncoding(level) 
                    + sourceUrl->Domain()
                , sourceUrl->Path());
            AddUrl(sourceUrl->Protocol()
                , L"www.evil.com/You/Are/So/Hacked.htm" 
                    + atEncoder.PartialEncoding(level) + sourceUrl->Domain()
                , sourceUrl->Path());
            AddUrl(sourceUrl->Protocol()
                , L"username:password" + atEncoder.FullEncoding(level) 
                    + sourceUrl->Domain()
                , sourceUrl->Path());
            AddUrl(sourceUrl->Protocol()
                , L"blahblah" + atEncoder.FullEncoding(level) 
                    + sourceUrl->Domain()
                , sourceUrl->Path());
            AddUrl(sourceUrl->Protocol()
                , L"www.evil.com/You/Are/So/Hacked.htm" 
                    + atEncoder.FullEncoding(level) + sourceUrl->Domain()
                , sourceUrl->Path());
            }

        //-------------------------------------------------
        // Domain's IP address variants.
        CIPEncoder ipEncoder(sourceUrl->Domain()
            , maxCharactersPrependedToIPAddresses, encodersEncodingLevel
            , encoder);
        for (unsigned long ipEncoding = 1; 
            ipEncoding <= ipEncoder.Count(); ++ipEncoding)
            {
            AddUrl(sourceUrl->Protocol(), ipEncoder.Item(ipEncoding)
                , sourceUrl->Path());
            }

        //-------------------------------------------------
        // Domain name encodings.
        CStringEncoder domainEncoder(sourceUrl->Domain()
            , encodersEncodingLevel, encoder);
        for (unsigned long domainEncoding = 1; 
            domainEncoding <= domainEncoder.Count(); ++domainEncoding)
            {
            AddUrl(sourceUrl->Protocol(), domainEncoder.Item(domainEncoding)
                , sourceUrl->Path());
            }

        //-------------------------------------------------
        // Dots and slashes in the path encodings.
        CPathEncoder dotEncoder(L'.', sourceUrl->Path(), encodersEncodingLevel
            , encoder);
        for (unsigned long dotEncoding = 1; dotEncoding <= dotEncoder.Count(); 
            ++dotEncoding)
            {
            AddUrl(sourceUrl->Protocol(), sourceUrl->Domain()
                , dotEncoder.Item(dotEncoding));
            }
        CPathEncoder slashEncoder(L'/', sourceUrl->Path()
            , encodersEncodingLevel, encoder);
        for (unsigned long slashEncoding = 1; 
            slashEncoding <= slashEncoder.Count(); ++slashEncoding)
            {
            AddUrl(sourceUrl->Protocol(), sourceUrl->Domain()
                , slashEncoder.Item(slashEncoding));
            }
        CPathEncoder backslashEncoder(L'\\', sourceUrl->Path()
            , encodersEncodingLevel, encoder);
        for (unsigned long backslashEncoding = 1; 
            backslashEncoding <= backslashEncoder.Count(); ++backslashEncoding)
            {
            AddUrl(sourceUrl->Protocol(), sourceUrl->Domain()
                , backslashEncoder.Item(backslashEncoding));
            }

        //-------------------------------------------------
        // Partial and full path encodings.
        CStringEncoder pathEncoder(sourceUrl->Path(), encodersEncodingLevel
            , encoder);
        for (unsigned long pathEncoding = 1; 
            pathEncoding <= pathEncoder.Count(); ++pathEncoding)
            {
            AddUrl(sourceUrl->Protocol(), sourceUrl->Domain()
                , pathEncoder.Item(pathEncoding));
            }

        //-------------------------------------------------
        // Inject path navigation.
        CNavigationInjector navigationInjector(sourceUrl->Path());
        for (unsigned long injectedPath = 1; 
            injectedPath <= navigationInjector.Count(); ++injectedPath)
            {
            AddUrl(sourceUrl->Protocol(), sourceUrl->Domain()
                , navigationInjector.Item(injectedPath));
            }
        }
    }

Previous Page | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 Next Page
TOP 5 ARTICLES
No Top Articles.



MICROSITES
FEATURED TOPIC

ADDITIONAL TOPICS

INFO-LINK