FREE Subscription to Dr. Dobb’s Digest: Same Great Content, New Digital Edition
Site Archive (Complete)
Dobbs M-Dev
Email
Print
Reprint

add to:
Del.icio.us
Digg
Google
Furl
Slashdot
Y! MyWeb
Blink
October 01, 2003

URL Canonicalization Testing

(Page 10 of 16)
URL Canonicalization Testing

Listing 8 CStringEncoder body


#include "stdafx.h"
#include "StringEncoder.h"

#include <algorithm>

#include "CharacterEncoder.h"

CStringEncoder::CStringEncoder(std::wstring stringToEncode, unsigned long level
    , IEncoder & encoder)
    {
    CreateSingleCharacterEncodings(stringToEncode, level, encoder);
    CreateEntireStringFullEncodings(stringToEncode, level, encoder);
    CreateEntireStringRandomEncodings(stringToEncode, level, encoder);
    }
CStringEncoder::~CStringEncoder()
    {
    }

unsigned long CStringEncoder::Count() const
    {
    return encodings.size();
    }
std::wstring CStringEncoder::Item(unsigned long index) const
    {
    return encodings[index - 1];
    }

void CStringEncoder::AddEncoding(std::wstring encodingToAdd)
    {
    if (DontAlreadyHaveEncoding(encodingToAdd))
        {
        encodings.push_back(encodingToAdd);
        }
    }
bool CStringEncoder::DontAlreadyHaveEncoding(std::wstring encodingToAdd) const
    {
    return (encodings.end() == std::find(encodings.begin(), encodings.end()
        , encodingToAdd));
    }

void CStringEncoder::CreateEntireStringFullEncodings(
    std::wstring stringToEncode, unsigned long level, IEncoder & encoder)
    {
    // We shouldn't ever get an empty string, but if we do we just won't
    //    encode it.
    if (0 == stringToEncode.length())
        {
        AddEncoding(stringToEncode);
        return;
        }

    for (unsigned long currentLevel = 0; currentLevel <= level; ++currentLevel)
        {
        std::wstring encoding(L"");
        
        // Replace every character in the string with its full encoding.
        for (std::wstring::size_type currentCharacter = 0; 
            currentCharacter < stringToEncode.length(); ++currentCharacter)
            {
            CCharacterEncoder characterEncoder(stringToEncode[currentCharacter]
                , currentLevel, encoder);
            encoding += characterEncoder.FullEncoding(currentLevel);
            }

        AddEncoding(encoding);
        }
    }

void CStringEncoder::CreateEntireStringRandomEncodings(
    std::wstring stringToEncode, unsigned long level, IEncoder & encoder)
    {
    // We shouldn't ever get an empty string, but if we do we just won't
    //    encode it.
    if (0 == stringToEncode.length())
        {
        AddEncoding(stringToEncode);
        return;
        }

    for (unsigned long currentLevel = 0; currentLevel <= level; ++currentLevel)
        {
        std::wstring encoding(L"");
        
        // Replace every character in the string with either its
        //    full or partial encoding.
        for (std::wstring::size_type currentCharacter = 0; 
            currentCharacter < stringToEncode.length(); ++currentCharacter)
            {
            CCharacterEncoder characterEncoder(stringToEncode[currentCharacter]
                , currentLevel, encoder);
            // Decide which character encoding level to use.  If we're on 
            //   level 0 we can skip this step.
            unsigned long levelToUse(0);
            if (0 < currentLevel)
                {
                levelToUse = SelectRandomIndex(0, currentLevel);
                }
            if (UseFullEncoding())
                {
                encoding += characterEncoder.FullEncoding(levelToUse);
                }
            else
                {
                encoding += characterEncoder.PartialEncoding(levelToUse);
                }
            }

        AddEncoding(encoding);
        }
    }
void CStringEncoder::CreateSingleCharacterEncodings(std::wstring stringToEncode
    , unsigned long level, IEncoder & encoder)
    {
    // We shouldn't ever get a null string, but if we do we just won't
    //    encode it.
    if (0 == stringToEncode.length())
        {
        AddEncoding(stringToEncode);
        return;
        }

    // Decide which character to replace.  If the string is only one
    //    character long we can skip this step.
    unsigned long indexToReplace(0);
    if (1 < stringToEncode.length())
        {
        indexToReplace = SelectRandomIndex(0, stringToEncode.length() - 1);
        }
    CCharacterEncoder characterEncoder(stringToEncode[indexToReplace], level
        , encoder);

    // Add a version of the base string with the chosen character replaced
    //    with its partial and full encoding at each level.
    for (unsigned long currentLevel = 0; currentLevel <= level; ++currentLevel)
        {
        std::wstring partialEncoding(stringToEncode);
        partialEncoding.replace(indexToReplace, 1
            , characterEncoder.PartialEncoding(currentLevel));
        AddEncoding(partialEncoding);

        std::wstring fullEncoding(stringToEncode);
        fullEncoding.replace(indexToReplace, 1
            , characterEncoder.FullEncoding(currentLevel));
        AddEncoding(fullEncoding);
        }
    }

Previous Page | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 Next Page
TOP 5 ARTICLES
No Top Articles.



MICROSITES
FEATURED TOPIC

ADDITIONAL TOPICS

INFO-LINK