October 01, 2003
URL Canonicalization Testing
URL Canonicalization Testing
Listing 8 CStringEncoder body
#include "stdafx.h"
#include "StringEncoder.h"
#include <algorithm>
#include "CharacterEncoder.h"
CStringEncoder::CStringEncoder(std::wstring stringToEncode, unsigned long level
, IEncoder & encoder)
{
CreateSingleCharacterEncodings(stringToEncode, level, encoder);
CreateEntireStringFullEncodings(stringToEncode, level, encoder);
CreateEntireStringRandomEncodings(stringToEncode, level, encoder);
}
CStringEncoder::~CStringEncoder()
{
}
unsigned long CStringEncoder::Count() const
{
return encodings.size();
}
std::wstring CStringEncoder::Item(unsigned long index) const
{
return encodings[index - 1];
}
void CStringEncoder::AddEncoding(std::wstring encodingToAdd)
{
if (DontAlreadyHaveEncoding(encodingToAdd))
{
encodings.push_back(encodingToAdd);
}
}
bool CStringEncoder::DontAlreadyHaveEncoding(std::wstring encodingToAdd) const
{
return (encodings.end() == std::find(encodings.begin(), encodings.end()
, encodingToAdd));
}
void CStringEncoder::CreateEntireStringFullEncodings(
std::wstring stringToEncode, unsigned long level, IEncoder & encoder)
{
// We shouldn't ever get an empty string, but if we do we just won't
// encode it.
if (0 == stringToEncode.length())
{
AddEncoding(stringToEncode);
return;
}
for (unsigned long currentLevel = 0; currentLevel <= level; ++currentLevel)
{
std::wstring encoding(L"");
// Replace every character in the string with its full encoding.
for (std::wstring::size_type currentCharacter = 0;
currentCharacter < stringToEncode.length(); ++currentCharacter)
{
CCharacterEncoder characterEncoder(stringToEncode[currentCharacter]
, currentLevel, encoder);
encoding += characterEncoder.FullEncoding(currentLevel);
}
AddEncoding(encoding);
}
}
void CStringEncoder::CreateEntireStringRandomEncodings(
std::wstring stringToEncode, unsigned long level, IEncoder & encoder)
{
// We shouldn't ever get an empty string, but if we do we just won't
// encode it.
if (0 == stringToEncode.length())
{
AddEncoding(stringToEncode);
return;
}
for (unsigned long currentLevel = 0; currentLevel <= level; ++currentLevel)
{
std::wstring encoding(L"");
// Replace every character in the string with either its
// full or partial encoding.
for (std::wstring::size_type currentCharacter = 0;
currentCharacter < stringToEncode.length(); ++currentCharacter)
{
CCharacterEncoder characterEncoder(stringToEncode[currentCharacter]
, currentLevel, encoder);
// Decide which character encoding level to use. If we're on
// level 0 we can skip this step.
unsigned long levelToUse(0);
if (0 < currentLevel)
{
levelToUse = SelectRandomIndex(0, currentLevel);
}
if (UseFullEncoding())
{
encoding += characterEncoder.FullEncoding(levelToUse);
}
else
{
encoding += characterEncoder.PartialEncoding(levelToUse);
}
}
AddEncoding(encoding);
}
}
void CStringEncoder::CreateSingleCharacterEncodings(std::wstring stringToEncode
, unsigned long level, IEncoder & encoder)
{
// We shouldn't ever get a null string, but if we do we just won't
// encode it.
if (0 == stringToEncode.length())
{
AddEncoding(stringToEncode);
return;
}
// Decide which character to replace. If the string is only one
// character long we can skip this step.
unsigned long indexToReplace(0);
if (1 < stringToEncode.length())
{
indexToReplace = SelectRandomIndex(0, stringToEncode.length() - 1);
}
CCharacterEncoder characterEncoder(stringToEncode[indexToReplace], level
, encoder);
// Add a version of the base string with the chosen character replaced
// with its partial and full encoding at each level.
for (unsigned long currentLevel = 0; currentLevel <= level; ++currentLevel)
{
std::wstring partialEncoding(stringToEncode);
partialEncoding.replace(indexToReplace, 1
, characterEncoder.PartialEncoding(currentLevel));
AddEncoding(partialEncoding);
std::wstring fullEncoding(stringToEncode);
fullEncoding.replace(indexToReplace, 1
, characterEncoder.FullEncoding(currentLevel));
AddEncoding(fullEncoding);
}
}
Previous Page |
1
|
2
|
3
|
4
|
5
|
6
|
7
|
8
|
9
|
10
|
11
|
12
|
13
|
14
|
15
|
16
Next Page