//=========================================================================================================
//
// Copyright 2002, 2003, 2004 Macromedia, Inc. All rights reserved.
//
// Feature: Paste Fix
// Author:  JDH
// Module:  PMScanners.js
// Purpose: HTML Scanning classes.
// Updates:
//	5/31/02 - Started file control
//
//=========================================================================================================


// This source file contains a number of generic helper classes that parse or alter a stream of HTML using
// the scanSourceString functionality in the Dreamweaver class.  The pattern for the use of all of these 
// classes is:
//
// var scanner = new <scanner_name>( <args> );
// var retVal = scanner.scan( html );
//
// Where the retVal is either the altered HTML (for a scanner that alters HTML), or an array (or associative
// array) for scanners that parse the HTML looking for specific tags.
//
// These classes are essentially unrolled, by which I mean that, while there are more elegant multi-level
// hierachal solutions to the problems solved by these scanners, I took a more lightweight approach and made
// more scanners that had lower levels of functionality, and did not descend from a large hierarchy.  The
// reason was mainly effeciency and simplicity.  Only the StructureScanner class is meant to be derived from
// during use.

// Some thoughts on scanSourceString:
//
// 1) Only implement the methods in the class that you need to get your job done.  Don't implement closeTagBegin
// if you don't need it.  scanSourceString looks for the existence of these methods at the beginning of the parse
// and will short-curcuit the call if you haven't defined a method, and that will save you some time.  It also
// makes the intention of your code more clear.
//
// 2) Don't return anything from the scanSourceString methods.  scanSourceString isn't looking for a return value
// so don't give it one.
//
// 3) I've seen a pattern in other code where you create an object with new Object and then add methods in 
// on slots using new Function.  I think the code is hard to read and hard to maintain, without performance benefit.
// I would avoid this pattern and I have not used it here.


function handleAttributeText( str )
{
	str = str.replace( /\"/g, "" );
	return str;
}


//---------------------------------------------------------------------------------------------------------
//  GetContentScanner
//---------------------------------------------------------------------------------------------------------

// The GetContentScanner class gets the contents of any tags in the array of tags
// specified in the input array. For example, specifying:  [ "p" ] and then scanning
// this:
//
// <html><body><p>This</p><p>is</p><p>a</p><p>test</p></body></html>
//
// Would return an array like, [ "This", "is", "a", "test" ].
//
// Please note that this code was specifically designed to ignore directives and to get the
// contents within directives.
//
// This scanner does not alter the HTML in any way.

function GetContentScanner( tagNameArray )
{
	this._tagNameArray = tagNameArray;
	this._findLookup = {};
	for( var index in tagNameArray )
		this._findLookup[ tagNameArray[ index ] ] = 1;
}

// External methods

GetContentScanner.prototype.scan = GetContentScanner_scan;

// scanSourceString specific methods

GetContentScanner.prototype.directive = GetContentScanner_directive;
GetContentScanner.prototype.text = GetContentScanner_text;
GetContentScanner.prototype.openTagBegin = GetContentScanner_openTagBegin;
GetContentScanner.prototype.closeTagBegin = GetContentScanner_closeTagBegin;

function GetContentScanner_scan( source )
{
	this._found = [];
	this._inTag = false;
	this._directives = [];
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "GetContentScanner bad scan" );

	for( var dir_index in this._directives )
		dw.scanSourceString( this._directives[ dir_index ], this );

	return this._found;
}

function GetContentScanner_directive( code, offset )
{
	try {

		code = code.replace( /^\<([^>]*)>/, "" );
		code = code.replace( /\<([^>]*)>$/, "" );

		this._directives.push( code );

	} catch(e) {

		this._scan_error = false;
		return false;

	}

	return true;
}

function GetContentScanner_text( code, offset )
{
	try {

		if ( this._inTag )
			this._found.push( code );

	} catch(e) {

		this._scan_error = false;
		return false;

	}

	return true;
}

function GetContentScanner_openTagBegin( tag, offset )
{
	try {

		if ( this._findLookup[ tag.toLowerCase() ] )
			this._inTag = true;

	} catch(e) {

		this._scan_error = false;
		return false;

	}

	return true;
}

function GetContentScanner_closeTagBegin( tag, offset )
{
	try {

		if ( this._findLookup[ tag.toLowerCase() ] )
			this._inTag = false;

	} catch(e) {

		this._scan_error = false;
		return false;

	}

	return true;
}




//---------------------------------------------------------------------------------------------------------
//  FindDirectiveScanner
//---------------------------------------------------------------------------------------------------------

// The FindDirectiveScanner class gets the contents of any tags in the array of tags
// specified in the input array. For example, specifying:  [ "p" ] and then scanning
// this:
//
// <html><body><p>This</p><p>is</p><p>a</p><p>test</p></body></html>
//
// Would return an array like, [ "This", "is", "a", "test" ].
//
// Please note that this code was specifically designed to ignore directives and to get the
// contents within directives.
//
// This scanner does not alter the HTML in any way.

function FindDirectiveScanner( directive )
{
	this._directive = directive;
}

// External methods

FindDirectiveScanner.prototype.scan = FindDirectiveScanner_scan;

// scanSourceString specific methods

FindDirectiveScanner.prototype.directive = FindDirectiveScanner_directive;

function FindDirectiveScanner_scan( source )
{
	this._found = false;
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "FindDirectiveScanner bad scan" );

	return this._found;
}

function FindDirectiveScanner_directive( code, offset )
{
	try {

		if ( code == this._directive )
			this._found = true;

		return true;

	} catch(e) {

		this._scan_error = false;
		return false;

	}

	return true;
}


//---------------------------------------------------------------------------------------------------------
//  GetMetaTagsScanner
//---------------------------------------------------------------------------------------------------------

// Scans the source string for meta tags and returns an associative array
// of name value pairs.  For example, scanning this HTML:
//
// <html><head>
// <meta name="key1" content="value1">
// <meta name="key2" content="value2">
// </head></html>
//
// Will return { key1: "value1", key2: "value2" }
//
// This scanner does not alter the HTML in any way.

function GetMetaTagsScanner( ) { }

// External methods

GetMetaTagsScanner.prototype.scan = GetMetaTagsScanner_scan;

// scanSourceString specific methods

GetMetaTagsScanner.prototype.openTagBegin = GetMetaTagsScanner_openTagBegin;
GetMetaTagsScanner.prototype.closeTagBegin = GetMetaTagsScanner_closeTagBegin;
GetMetaTagsScanner.prototype.attribute = GetMetaTagsScanner_attribute;

function GetMetaTagsScanner_scan( source )
{
	this._found = {};
	this._inMeta = false;
	this._name = null;
	this._content = null;
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "GetMetaTagsScanner bad scan" );

	return this._found;
}

function GetMetaTagsScanner_openTagBegin( tag, offset )
{
	try {

		if ( tag.toLowerCase() == "meta" )
		{
			this._inMeta = true;
			this._name = null;
			this._content = null;
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}
}

function GetMetaTagsScanner_closeTagBegin( tag, offset )
{
	try {

		if ( this._inMeta )
			this._inMeta = false;

	} catch( e ) {

		this._scan_error = true;
		return false;

	}
}

function GetMetaTagsScanner_attribute( name, code )
{
	try {

		if ( this._inMeta )
		{
			if ( name.toLowerCase() == "name" )
				this._name = code;
			if ( name.toLowerCase() == "content" )
				this._content = code;

			if ( this._name != null && this._content != null )
			{
				this._found[ this._name.toLowerCase() ] = this._content;
				this._name = null;
				this._content = null;
			}
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}
}



//---------------------------------------------------------------------------------------------------------
//  ParseSupportListsScanner
//---------------------------------------------------------------------------------------------------------

// The ParseSupportListsScanner was specifically written with word in mind.  Word has some fairly
// interesting directive laced HTML that sits around hierarchal list items.  This does not apply to single
// level numeric or bulleted lists, which are implemented with <LI> tags.  This only applies to multi-level
// hierarchal lists, like:
//
// 1. First level
//     1.1  Sub level 1
//          1.1.1  Something reminiscent of the military
//     1.2  Sub level 2
//
// The Word format puts in a directive around supporting lists, this parser removes only the
// sections we want from it.  As an example:
// 
// <![if !supportLists]><span style='mso-list:Ignore'>I.<span
// style='font:7.0pt "Times New Roman"'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nb
// sp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
// </span></span><![endif]>
//
// Becomes, just:
//
// I.
//
// The encloding <P> tag has the indent level in the style attribute.

function ParseSupportListsScanner( ) { }

// Local methods

ParseSupportListsScanner.prototype.scan = ParseSupportListsScanner_scan;

// scanSourceString specific methods

ParseSupportListsScanner.prototype.directive = ParseSupportListsScanner_directive;
ParseSupportListsScanner.prototype.text = ParseSupportListsScanner_text;
ParseSupportListsScanner.prototype.openTagBegin = ParseSupportListsScanner_openTagBegin;
ParseSupportListsScanner.prototype.openTagEnd = ParseSupportListsScanner_openTagEnd;
ParseSupportListsScanner.prototype.closeTagBegin = ParseSupportListsScanner_closeTagBegin;
ParseSupportListsScanner.prototype.attribute = ParseSupportListsScanner_attribute;

function ParseSupportListsScanner_scan( source, context )
{
	this._sb = context.createStringBuffer();
	this._inSupportLists = false;
	this._scan_error = false;
	this._in_paragraph = false;
	this._in_list = false;
	this._p_attributes = {};
	this._item_level = 0;
	this._cur_item_level = 0;

//	var text = source.replace( /\n/g, "" );
//	text = text.replace( /\r/g, "" );
//	var found = text.match( /\<body(.*)/img );
//	alert( found );

	dw.scanSourceString( source, this );

//	text = this._sb.get();
//	text = text.replace( /\n/g, "" );
//	text = text.replace( /\r/g, "" );
//	found = text.match( /\<body(.*)/img );
//	alert( found );

	if ( this._scan_error )
		throw( "ParseSupportListsScanner bad scan" );

	return this._sb.get();
}

function ParseSupportListsScanner_directive( code, offset )
{
	try {

		var testCode = code.toLowerCase();
		if( code.match( /^\<\!\[if \!supportLists\]>/ ) )
		{
			this._inSupportLists = true;
		}
		else if ( this._inSupportLists )
		{
			if ( code.match( /^\<\!\[endif\]\>/ ) )
				this._inSupportLists = false;
		}
		else
		{
			this._sb.append( code );
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function ParseSupportListsScanner_text( code, offset )
{
	try {

		if ( ! this._inSupportLists )
		{
			if ( code.length > 0 )
				this._sb.append( code );
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function ParseSupportListsScanner_openTagBegin( tag, offset )
{
	try {

		if ( tag.toString().toLowerCase() == "p" )
		{
			this._in_paragraph = true;
			this._p_attributes = {};
			this._in_list = false;
		}
		else if ( ! this._inSupportLists )
		{
			if ( ! this._in_list && ! this._in_paragraph )
			{
				if ( this._cur_item_level )
				{
					for( var rep = 0; rep < this._cur_item_level; rep++ )
						this._sb.append( "</ul>" );
					this._cur_item_level = 0;
				}
			}

			this._sb.append( "<" + tag );
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function ParseSupportListsScanner_openTagEnd( tag, trailingFormat )
{
	try {

		if ( this._in_paragraph )
		{
			if ( this._in_list )
			{
				if ( this._item_level > this._cur_item_level )
				{
					var delta = this._item_level - this._cur_item_level;
					for( var rep = 0; rep < delta; rep++ )
						this._sb.append( "<ul>" );
				}

				if ( this._item_level < this._cur_item_level )
				{
					var delta = this._cur_item_level - this._item_level;
					for( var rep = 0; rep < delta; rep++ )
						this._sb.append( "</ul>" );
				}

				this._cur_item_level = this._item_level;
				this._sb.append( "<li>" );
			}
			else
			{
				if ( this._cur_item_level )
				{
					for( var rep = 0; rep < this._cur_item_level; rep++ )
						this._sb.append( "</ul>" );
					this._cur_item_level = 0;
				}
			
				var code = "<p";
				for( var key in this._p_attributes )
				{
					if ( this._p_attributes[ key ] != null )
						code += " " + key + "=\"" + handleAttributeText(this._p_attributes[ key ]) + "\"";
				}
				code += ">";
				this._sb.append( code );
			}
			this._in_paragraph = false;
		}
		else if ( ! this._inSupportLists )
		{
			this._sb.append( ">" );

			if ( this._cur_item_level && ! this._in_list )
			{
				for( var rep = 0; rep < this._cur_item_level; rep++ )
					this._sb.append( "</ul>" );
				this._cur_item_level = 0;
			}
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function ParseSupportListsScanner_closeTagBegin( tag, offset )
{
	try {

		if ( tag.toLowerCase() == "p" && this._in_list )
		{
			this._sb.append( "</li>" );
			this._in_list = false;
		}
		else if ( ! this._inSupportLists )
		{
			if ( this._cur_item_level && ! this._in_list )
			{
				for( var rep = 0; rep < this._cur_item_level; rep++ )
					this._sb.append( "</ul>" );
				this._cur_item_level = 0;
			}

			this._sb.append( "</" + tag + ">" );
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function ParseSupportListsScanner_attribute( name, code )
{
	try {

		if ( this._in_paragraph )
		{
			this._p_attributes[ name ] = code;
			if ( name.toString().toLowerCase() == "style" )
			{
				if ( code.match( /mso\-list/im ) )
				{
					var levelnum = code.match( /level(\d+)\s/ );
					
					this._item_level = 1;
					if ( levelnum )
						this._item_level = parseInt( levelnum[1] );

					this._in_list = true;
				}
			}
		}
		else if ( ! this._inSupportLists )
			this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}



//---------------------------------------------------------------------------------------------------------
//  RemoveConditionalsScanner
//---------------------------------------------------------------------------------------------------------

// This scanner is specific to MS products.  The MS HTML output format is laced with 
// directives throughout.  What this scanner does is find the directives and remove them.  Additionally, if some
// interesting tags are within the directive then it will keep just that tag and remove the surrounding directive.
// It is important to note that there are often nested directives with MS documents.
//
// An example of the directives you might see is:
//
//  <![if !vml]><span style='mso-ignore:vglayout'><table cellpadding=0 cellspacing=0>
//   <tr><td width=63 height=0></td></tr>
//   <tr><td></td>
//    <td><![endif]><![if !excel]><img width=482 height=247 src="file:some_temp_file.gif">
//    <![endif]><![if !vml]></td>
//    <td width=31></td>
//   </tr>
//   <tr><td height=8></td></tr></table></span><![endif]>
// 
// When all we really want is:
//
// <img width=482 height=247 src="file:some_temp_file.gif">
//
// Passing allowTags = { img: 1 } will do that for you.
//
// One note on RemoveConditionalsScanner. The tags you specify with allowTags should be tags that have opens with
// no closes, like <img> or <br>.  The code is not built generically enough to handle open and close pairs.  If that
// is required then the class will need some redesigning.

function RemoveConditionalsScanner( allowTags )
{
	this._allowTags = allowTags;
}

// External methods

RemoveConditionalsScanner.prototype.scan = RemoveConditionalsScanner_scan;

// scanSourceString specific methods

RemoveConditionalsScanner.prototype.directive = RemoveConditionalsScanner_directive;
RemoveConditionalsScanner.prototype.text = RemoveConditionalsScanner_text;
RemoveConditionalsScanner.prototype.openTagBegin = RemoveConditionalsScanner_openTagBegin;
RemoveConditionalsScanner.prototype.openTagEnd = RemoveConditionalsScanner_openTagEnd;
RemoveConditionalsScanner.prototype.closeTagBegin = RemoveConditionalsScanner_closeTagBegin;
RemoveConditionalsScanner.prototype.attribute = RemoveConditionalsScanner_attribute;

function RemoveConditionalsScanner_scan( source, context )
{
	this._sb = context.createStringBuffer();
	this._inDirective = 0;
	this._exceptionTag = false;
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "RemoveConditionalsScanner bad scan" );

	return this._sb.get();
}

function RemoveConditionalsScanner_directive( code, offset )
{
	try {

		var testCode = code.toLowerCase();
		if( testCode.match( /^\<\!\[if/ ) )
		{
			this._inDirective++;
		}
		else if ( testCode.match( /^\<\!\-\-\[if/ ) )
		{
			;
		}
		else if ( this._inDirective )
		{
			if ( testCode.match( /^\<\!\[endif\]/ ) )
				this._inDirective--;
		}
		else
		{
			if ( code.match( /\<\!\-\-(\s*)startfragment(\s*)\-\->/i ) || code.match( /\<\!\-\-(\s*)endfragment(\s*)\-\->/i ) )
				this._sb.append( code );
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveConditionalsScanner_text( code, offset )
{
	try {

		if ( this._inDirective == 0 )
			this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveConditionalsScanner_openTagBegin( tag, offset )
{
	try {

		if ( this._allowTags[ tag.toLowerCase() ] )
			this._exceptionTag = true;

		if ( this._inDirective == 0 || this._exceptionTag )
			this._sb.append( "<" + tag );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveConditionalsScanner_openTagEnd( tag, trailingFormat )
{
	try {

		if ( this._inDirective == 0 || this._exceptionTag )
			this._sb.append( ">" );

		this._exceptionTag = false;

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveConditionalsScanner_closeTagBegin( tag, offset )
{
	try {

		if ( this._inDirective == 0 || this._exceptionTag )
			this._sb.append( "</" + tag + ">" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveConditionalsScanner_attribute( name, code )
{
	try {

		if ( this._inDirective == 0 || this._exceptionTag )
			this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}





//---------------------------------------------------------------------------------------------------------
//  FindClippingScanner
//---------------------------------------------------------------------------------------------------------

// FindClippingScanner is designed to find the fragment of the clipboard in the HTML and return just that
// fragment with the minimum amount of support HTML around it.  As an example:
//
// <html><head>.... Cruft...</head><body>... More cruft ...
// <!---StartFragment---><p>Some small text</p><!---EndFragment--->... More cruft ...</body></html>
//
// Becomes:
//
// <html><body><!---StartFragment---><p>Some small text</p><!---EndFragment---></body></html>
//
// The idea is that removing all of the header, body pre, and body post information.
//
// In the case where there is no <!--StartFragment--> (where we are importing the contents of a file) then
// we look for the start and end <body> tags.

function FindClippingScanner( ) { }

// External methods

FindClippingScanner.prototype.scan = FindClippingScanner_scan;

// scanSourceString specific methods

FindClippingScanner.prototype.directive = FindClippingScanner_directive;
FindClippingScanner.prototype.text = FindClippingScanner_text;
FindClippingScanner.prototype.openTagBegin = FindClippingScanner_openTagBegin;
FindClippingScanner.prototype.openTagEnd = FindClippingScanner_openTagEnd;
FindClippingScanner.prototype.closeTagBegin = FindClippingScanner_closeTagBegin;
FindClippingScanner.prototype.attribute = FindClippingScanner_attribute;

function FindClippingScanner_scan( source, context )
{
	this._inClipping = false;
	this._firstTagException = false;
	this._findComment = false;
	this._clipTag = "";
	this._frag = "";
	this._scan_error = false;

	this._sb = context.createStringBuffer();

	if( source.match( /\<\!\-\-(\s*)startfragment(\s*)\-\->/i ) )
	{
		this._findComment = true;
		this._startText = /\<\!\-\-(\s*)startfragment(\s*)\-\->/i;
		this._endText = /\<\!\-\-(\s*)endfragment(\s*)\-\->/i;
	}
	else
	{
		// klo - Fix for #168813, copying and pasting from Hangul Wordian word processor.
		// They use a different comment to note the text fragment to copy:
		// <!-- Document Start -->
		// <!-- Document End -->
		if( source.match( /\<\!\-\-(\s*)document start(\s*)\-\->/i ) )
		{
			this._findComment = true;
			this._startText = /\<\!\-\-(\s*)document start(\s*)\-\->/i;
			this._endText = /\<\!\-\-(\s*)document end(\s*)\-\->/i;
		}
		else
			this._clipTag = "body";
	}

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "FindClippingScanner bad scan" );

	var text = this._sb.get();

	return "<html><body>" + text + "</body></html>";
}

function FindClippingScanner_directive( code, offset )
{
	try {

		var testCode = code.toLowerCase();

		if( this._findComment && this._startText.exec( testCode ) )
		{
			code = "<!--StartFragment-->";
			this._inClipping = true;
		}

		if( this._findComment && this._endText.exec( testCode ) )
		{
			code = "<!--EndFragment-->";
			this._sb.append( code );
			this._inClipping = false;
		}
		else if ( this._inClipping )
			this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function FindClippingScanner_text( code, offset )
{
	try {

		if ( this._inClipping )
			this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function FindClippingScanner_openTagBegin( tag, offset )
{
	try {

		if ( this._inClipping )
			this._frag = "<" + tag;

		if ( this._clipTag == tag && this._findComment == false )
		{
			this._inClipping = true;
			this._firstTagException = true;
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function FindClippingScanner_openTagEnd( tag, trailingFormat )
{
	try {

		if ( this._inClipping && this._firstTagException == false )
		{
			this._frag += ">";
			this._sb.append( this._frag );
		}

		this._firstTagException = false;

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function FindClippingScanner_closeTagBegin( tag, offset )
{
	try {

		if ( this._clipTag == tag && this._findComment == false )
			this._inClipping = false;

		if ( this._inClipping )
			this._sb.append( "</" + tag + ">" );

		this._exceptionTag = false;

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function FindClippingScanner_attribute( name, code )
{
	try {

		if ( this._inClipping && this._firstTagException == false )
			this._frag += " " + name + "=\"" + handleAttributeText(code) + "\"";

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}



//---------------------------------------------------------------------------------------------------------
//  RemoveTagsScanner
//---------------------------------------------------------------------------------------------------------

// The RemoveTagScanner removes any tag listed in the associative array passed
// in with the constructor.  For example, to remove every <P> tag from the 
// following HTML:
//
//    <html><body><p>This</p><ul><li>is<li>a<li>test</ul></body><html>
//
// Yould would pass { p: 1 } into the constructor and invoke scan with the HTML.
// The return value would be:
//
//    <html><body><ul><li>is<li>a<li>test</ul></body><html>

function RemoveTagsScanner( tagLookup )
{
	this._tagLookup = tagLookup;
}

// External methods

RemoveTagsScanner.prototype.scan = RemoveTagsScanner_scan;

// scanSourceString specific methods

RemoveTagsScanner.prototype.directive = RemoveTagsScanner_directive;
RemoveTagsScanner.prototype.text = RemoveTagsScanner_text;
RemoveTagsScanner.prototype.openTagBegin = RemoveTagsScanner_openTagBegin;
RemoveTagsScanner.prototype.openTagEnd = RemoveTagsScanner_openTagEnd;
RemoveTagsScanner.prototype.closeTagBegin = RemoveTagsScanner_closeTagBegin;
RemoveTagsScanner.prototype.attribute = RemoveTagsScanner_attribute;

function RemoveTagsScanner_scan( source, context )
{
	this._sb = context.createStringBuffer();
	this._inGoodTag = true;
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "RemoveTagsScanner bad scan" );

	return this._sb.get();
}

function RemoveTagsScanner_directive( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveTagsScanner_text( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveTagsScanner_openTagBegin( tag, offset )
{
	try {

		if ( this._tagLookup[ tag.toLowerCase() ] )
			this._inGoodTag = true;
		else
		{
			this._inGoodTag = false;
			//--jcheng
			//this._sb.append( " " );
		}

		if ( this._inGoodTag )
			this._sb.append( "<" + tag );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveTagsScanner_openTagEnd( tag, trailingFormat )
{
	try {

		if ( this._inGoodTag )
			this._sb.append( ">" );

		this._inGoodTag = true;

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveTagsScanner_closeTagBegin( tag, offset )
{
	try {

		if ( this._tagLookup[ tag.toLowerCase() ] )
			this._sb.append( "</" + tag + ">" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveTagsScanner_attribute( name, code )
{
	try {

		if ( this._inGoodTag )
		{
			
			this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}



//---------------------------------------------------------------------------------------------------------
//  RemoveAttributesScanner
//---------------------------------------------------------------------------------------------------------

// The RemoveAttributesScanner removes attributes and styles that are NOT
// listed in the constructor.  For example with the HTML:
//
//    <html><body><p>This</p><ul><li>is<li>a<li>test</ul></body><html>
//
// Passing in { html: 1, body: 1, p: 1 } as the attributes would get you:
//
//    <html><body><p>This</p> is a test </body><html>
//
// The same thing applies to the attributes.  So on this html:
//
//     <p style="mso-reject: 1; font-family: Arial">
//
// Passing in { "font-family": 1 } as the style filter would get you:
//
//     <p style="font-family: Arial;">
//
 
function RemoveAttributesScanner( attributeLookup, styleLookup )
{
	this._attributeLookupMaster = attributeLookup;
	this._styleLookupMaster = styleLookup;
}

// External methods

RemoveAttributesScanner.prototype.scan = RemoveAttributesScanner_scan;

// scanSourceString specific methods

RemoveAttributesScanner.prototype.directive = RemoveAttributesScanner_directive;
RemoveAttributesScanner.prototype.text = RemoveAttributesScanner_text;
RemoveAttributesScanner.prototype.openTagBegin = RemoveAttributesScanner_openTagBegin;
RemoveAttributesScanner.prototype.openTagEnd = RemoveAttributesScanner_openTagEnd;
RemoveAttributesScanner.prototype.closeTagBegin = RemoveAttributesScanner_closeTagBegin;
RemoveAttributesScanner.prototype.attribute = RemoveAttributesScanner_attribute;

function RemoveAttributesScanner_scan( source, context )
{
	this._sb = context.createStringBuffer();
	this._tagName = null;
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "RemoveAttributesScanner bad scan" );

	return this._sb.get();
}

function RemoveAttributesScanner_directive( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveAttributesScanner_text( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveAttributesScanner_openTagBegin( tag, offset )
{
	try {

		this._tagName = tag.toLowerCase();

		this._attributeLookup = this._attributeLookupMaster[ this._tagName ];
		this._styleLookup = this._styleLookupMaster[ this._tagName ];

		if ( this._attributeLookup == null )
			this._attributeLookup = {};
		if ( this._styleLookup == null )
			this._styleLookup = {};

		this._sb.append( "<" + tag );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveAttributesScanner_openTagEnd( tag, trailingFormat )
{
	try {

		this._sb.append( ">" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveAttributesScanner_closeTagBegin( tag, offset )
{
	try {

		this._sb.append( "</" + tag + ">" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveAttributesScanner_attribute( name, code )
{
	try {

		if ( this._attributeLookup[ name.toLowerCase() ] )
		{
			if ( name.toLowerCase() == "style" )
			{
				var styles = Utils_ParseStyle( code );

				for( var style in styles )
				{
					if ( this._styleLookup[ style.toLowerCase() ] == null )
						styles = Utils_DeleteArrayItem( style, styles );
				}

				// Rebuild the style text

				code = Utils_BuildStyle( styles );
			}

			if ( code.length > 0 )
				this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}


//---------------------------------------------------------------------------------------------------------
//  RemoveOnlyTheseTagsScanner
//---------------------------------------------------------------------------------------------------------

// The RemoveOnlyTheseTagsScanner removes only the specified tags from the given HTML stream.
// So given this HTML:
//
//    <html><body><p>This is <b>bold</b> and <i>italic</i></p></body></html>
//
// With a filter of { b: 1, i: 1 }, you would get:
//
//    <html><body><p>This is bold  and italic </p></body></html>

function RemoveOnlyTheseTagsScanner( tagLookup )
{
	this._tagLookup = tagLookup;
}

// External methods

RemoveOnlyTheseTagsScanner.prototype.scan = RemoveOnlyTheseTagsScanner_scan;

// scanSourceString specific methods

RemoveOnlyTheseTagsScanner.prototype.directive = RemoveOnlyTheseTagsScanner_directive;
RemoveOnlyTheseTagsScanner.prototype.text = RemoveOnlyTheseTagsScanner_text;
RemoveOnlyTheseTagsScanner.prototype.openTagBegin = RemoveOnlyTheseTagsScanner_openTagBegin;
RemoveOnlyTheseTagsScanner.prototype.openTagEnd = RemoveOnlyTheseTagsScanner_openTagEnd;
RemoveOnlyTheseTagsScanner.prototype.closeTagBegin = RemoveOnlyTheseTagsScanner_closeTagBegin;
RemoveOnlyTheseTagsScanner.prototype.attribute = RemoveOnlyTheseTagsScanner_attribute;

function RemoveOnlyTheseTagsScanner_scan( source, context )
{
	this._sb = context.createStringBuffer();
	this._inGoodTag = true;
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "RemoveOnlyTheseTagsScanner bad scan" );

	return this._sb.get();
}

function RemoveOnlyTheseTagsScanner_directive( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveOnlyTheseTagsScanner_text( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveOnlyTheseTagsScanner_openTagBegin( tag, offset )
{
	try {

		if ( this._tagLookup[ tag.toLowerCase() ] )
			this._inGoodTag = false;
		else
			this._inGoodTag = true;

		if ( this._inGoodTag )
			this._sb.append( "<" + tag );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveOnlyTheseTagsScanner_openTagEnd( tag, trailingFormat )
{
	try {

		if ( this._inGoodTag )
			this._sb.append( ">" );

		this._inGoodTag = true;

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveOnlyTheseTagsScanner_closeTagBegin( tag, offset )
{
	try {

		if ( this._tagLookup[ tag.toLowerCase() ] == null )
			this._sb.append( "</" + tag + ">" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveOnlyTheseTagsScanner_attribute( name, code )
{
	try {

		if ( this._inGoodTag )
			this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}



//---------------------------------------------------------------------------------------------------------
//  RemoveOnlyTheseAttributesScanner
//---------------------------------------------------------------------------------------------------------

// The RemoveOnlyTheseAttributesScanner removes just the specified attributes from ANY
// tag in the provided HTML string.  As an example, this HTML:
//
//    <html><body><p class=MsoNormal>Hello</p></body></html>
//
// With a filter of { 'class': 1 } would result in this HTML:
//
//    <html><body><p>Hello </p></body></html>

function RemoveOnlyTheseAttributesScanner( attributeLookup )
{
	this._attributeLookup = attributeLookup;
}

// External methods

RemoveOnlyTheseAttributesScanner.prototype.scan = RemoveOnlyTheseAttributesScanner_scan;

// scanSourceString specific methods

RemoveOnlyTheseAttributesScanner.prototype.directive = RemoveOnlyTheseAttributesScanner_directive;
RemoveOnlyTheseAttributesScanner.prototype.text = RemoveOnlyTheseAttributesScanner_text;
RemoveOnlyTheseAttributesScanner.prototype.openTagBegin = RemoveOnlyTheseAttributesScanner_openTagBegin;
RemoveOnlyTheseAttributesScanner.prototype.openTagEnd = RemoveOnlyTheseAttributesScanner_openTagEnd;
RemoveOnlyTheseAttributesScanner.prototype.closeTagBegin = RemoveOnlyTheseAttributesScanner_closeTagBegin;
RemoveOnlyTheseAttributesScanner.prototype.attribute = RemoveOnlyTheseAttributesScanner_attribute;

function RemoveOnlyTheseAttributesScanner_scan( source, context )
{
	this._sb = context.createStringBuffer();
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "RemoveOnlyTheseAttributesScanner bad scan" );

	return this._sb.get();
}

function RemoveOnlyTheseAttributesScanner_directive( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveOnlyTheseAttributesScanner_text( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveOnlyTheseAttributesScanner_openTagBegin( tag, offset )
{
	try {

		this._sb.append( "<" + tag );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveOnlyTheseAttributesScanner_openTagEnd( tag, trailingFormat )
{
	try {

		this._sb.append( ">" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveOnlyTheseAttributesScanner_closeTagBegin( tag, offset )
{
	try {

		this._sb.append( "</" + tag + ">" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveOnlyTheseAttributesScanner_attribute( name, code )
{
	try {

		if ( this._attributeLookup[ name.toLowerCase() ] == null )
			this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}



//---------------------------------------------------------------------------------------------------------
//  MapTagNamesScanner
//---------------------------------------------------------------------------------------------------------

// The MapTagNamesScanner is a very specialized scanner.  It takes a regular expression
// and maps any tag name that matches that expression to the given name.  For example,
// given this HTML:
//
// <html><body><h1>Level One</h1><h2>Level Two</h2></body></html>
//
// With matchCriteria = new Regexp( /h[12]/ ) and outName = "p", you would get:
//
// <html><body><p>Level One </p><p>Level Two </p></body></html>
//

function MapTagNamesScanner( matchCriteria, outName )
{
	this._matchCriteria = matchCriteria;
	this._outName = outName;
}

// External methods

MapTagNamesScanner.prototype.scan = MapTagNamesScanner_scan;

// scanSourceString specific methods

MapTagNamesScanner.prototype.directive = MapTagNamesScanner_directive;
MapTagNamesScanner.prototype.text = MapTagNamesScanner_text;
MapTagNamesScanner.prototype.openTagBegin = MapTagNamesScanner_openTagBegin;
MapTagNamesScanner.prototype.openTagEnd = MapTagNamesScanner_openTagEnd;
MapTagNamesScanner.prototype.closeTagBegin = MapTagNamesScanner_closeTagBegin;
MapTagNamesScanner.prototype.attribute = MapTagNamesScanner_attribute;

function MapTagNamesScanner_scan( source, context )
{
	this._sb = context.createStringBuffer();
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "MapTagNamesScanner bad scan" );

	return this._sb.get();
}

function MapTagNamesScanner_directive( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function MapTagNamesScanner_text( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function MapTagNamesScanner_openTagBegin( tag, offset )
{
	try {

		if ( this._matchCriteria.exec( tag ) )
			tag = this._outName;
		this._sb.append( "<" + tag );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function MapTagNamesScanner_openTagEnd( tag, trailingFormat )
{
	try {

		this._sb.append( ">" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function MapTagNamesScanner_closeTagBegin( tag, offset )
{
	try {

		if ( this._matchCriteria.exec( tag ) )
			tag = this._outName;
		this._sb.append( "</" + tag + ">" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function MapTagNamesScanner_attribute( name, code )
{
	try {

		this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}


//---------------------------------------------------------------------------------------------------------
//  AddStylesScanner
//---------------------------------------------------------------------------------------------------------

// The AddStylesScanner is meant to do just that, add the specified styles to the tags
// of the type specified.  For example, with the following HTML:
//
// <html><body><p>Level One</p></body></html>
//
// Passing tagName = "p" and styles = { 'margin-top':0, 'margin-bottom':0 } you would get:
//
// <html><body><p style="margin-top:0;margin-bottom:0">Level One </p></body></html>
//

function AddStylesScanner( tagName, styles )
{
	this._tagName = tagName.toLowerCase();
	this._styles = styles;
}

// External methods

AddStylesScanner.prototype.scan = AddStylesScanner_scan;

// scanSourceString specific methods

AddStylesScanner.prototype.directive = AddStylesScanner_directive;
AddStylesScanner.prototype.text = AddStylesScanner_text;
AddStylesScanner.prototype.openTagBegin = AddStylesScanner_openTagBegin;
AddStylesScanner.prototype.openTagEnd = AddStylesScanner_openTagEnd;
AddStylesScanner.prototype.closeTagBegin = AddStylesScanner_closeTagBegin;
AddStylesScanner.prototype.attribute = AddStylesScanner_attribute;

function AddStylesScanner_scan( source, context )
{
	this._sb = context.createStringBuffer();
	this._fixup = false;
	this._foundStyle = false;
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "AddStylesScanner bad scan" );

	return this._sb.get();
}

function AddStylesScanner_directive( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function AddStylesScanner_text( code, offset )
{
	try {

		this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function AddStylesScanner_openTagBegin( tag, offset )
{
	try {

		if ( this._tagName == tag.toLowerCase() )
			this._fixup = true;

		this._foundStyle = false;

		this._sb.append( "<" + tag );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function AddStylesScanner_openTagEnd( tag, trailingFormat )
{
	try {

		if ( this._fixup == true && this._foundStyle == false )
		{
			var code = Utils_BuildStyle( this._styles );

			this._sb.append( " style=\"" + code + "\"" );
		}

		this._sb.append( ">" );

		this._fixup = false;

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function AddStylesScanner_closeTagBegin( tag, offset )
{
	try {

		this._sb.append( "</" + tag + ">" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function AddStylesScanner_attribute( name, code )
{
	try {

		if ( this._fixup && name.toLowerCase() == "style" )
		{
			var styles = Utils_ParseStyle( code );

			for( var style in this._styles )
				styles[ style ] = this._styles[ style ];

			code = Utils_BuildStyle( styles );

			this._foundStyle = true;
		}

		if ( code.length > 0 )
			this._sb.append( " " + name + "=\"" + handleAttributeText(code) + "\"" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}




//---------------------------------------------------------------------------------------------------------
//  StructureScanner
//---------------------------------------------------------------------------------------------------------

// The StructureScanner is the most complex of all of the scanners.  Given proper HTML it will create an
// internal representation of the HTML using associative arrays and arrays, and then reconstruct the HTML
// while calling member functions (which can be overidden) at key points.
//
// Given this HTML:
//
// <html><body><p>This is a test</p></body></html>
//
// The internal representation would be:
//
// { type: 'root',
//   children: [
//     { type: 'tag',
//       tag: 'html',
//       attributes: {},
//       children: [
//         { type: 'tag',
//           tag: 'body',
//           attributes: {},
//           children: [
//             { type: 'tag',
//               tag: 'p',
//               attributes: {},
//               children: [
//                 { type: 'text',
//                   text: 'This is a test'
//                 }
//             }
//         }
//     }
// }

// The overide methods descibed below are called after the structure has been created, during
// the phase where the new HTML text is created.

// -- StructureScanner.prototype.inspectTag( tag )
//
//    tag - The root of this tag structure
//
// This is called first time the tag is seen during the creation phase.  Here you can alter
// the tag before it is sent to the output.  You can change the tag name, remove or add attributes,
// and alter the children from here on down.

// -- StructureScanner.prototype.startTag( tag )
//
//    tag - The root of this tag structure
//
// For tags (not text) start tag is created before the child nodes are turned into HTML.

// -- StructureScanner.prototype.createTag( tag, attributes, closed )
//
//    tag - The tag name
//    attributes - The associative array of attributes
//    closed - true if the tag was both opened and closed officially (e.g. <p> and </p>)
//
// This is called to create the HTML for the tag.  This method does not need to handle the child
// nodes, those are handled by the structure parser (if you want to alter those see inspectTag.)
// The output from this should either be null (which means that StructureScanner should handle
// the tag) or an associative array with postfix and prefix attributes.  The postfix is how the
// tag should end and prefix is how the tag should start.

// -- StructureScanner.prototype.endTag( tag )
//
//    tag - The root of this tag structure
//
// The opposite number of start tag.

// -- StructureScanner.prototype.finalizeTag( tag, attributes, closed, childHTML )
//
//    tag - The tag name
//    attributes - The associative array of attributes
//    closed - true if the tag was both opened and closed officially (e.g. <p> and </p>)
//    childHTML - The finalized HTML of all of the children
//
// This is called as a final approval of the tag.  If false is returned then the tag (and all of
// it's children) are not added into the HTML stream.

function StructureScanner( ) { }

// External methods

StructureScanner.prototype.scan = StructureScanner_scan;

// scanSourceString methods

StructureScanner.prototype.directive = StructureScanner_directive;
StructureScanner.prototype.text = StructureScanner_text;
StructureScanner.prototype.openTagBegin = StructureScanner_openTagBegin;
StructureScanner.prototype.closeTagBegin = StructureScanner_closeTagBegin;
StructureScanner.prototype.attribute = StructureScanner_attribute;

// Internal methods to build the structure

StructureScanner.prototype.addTextChild = StructureScanner_addTextChild;
StructureScanner.prototype.addTagChild = StructureScanner_addTagChild;
StructureScanner.prototype.addAttribute = StructureScanner_addAttribute;
StructureScanner.prototype.finishTag = StructureScanner_finishTag;
StructureScanner.prototype.buildHTML = StructureScanner_buildHTML;

// Methods to overide

StructureScanner.prototype.inspectTag = StructureScanner_inspectTag;
StructureScanner.prototype.startTag = StructureScanner_startTag;
StructureScanner.prototype.createTag = StructureScanner_createTag;
StructureScanner.prototype.finalizeTag = StructureScanner_finalizeTag;
StructureScanner.prototype.endTag = StructureScanner_endTag;

function StructureScanner_addTextChild( text )
{
	this._curTag.children.push( { type: "text", text: text } );
}

function StructureScanner_addTagChild( tag )
{
	tag = tag.toLowerCase();

	var node = { type: "tag", tag: tag, attributes: {}, children: [], closed: false };
	this._curTag.children.push( node );
	this._curTag = node;
	this._opStack.push( node );
}

function StructureScanner_addAttribute( name, value )
{
	name = name.toLowerCase();

	this._curTag.attributes[ name ] = value;
}

function StructureScanner_finishTag( tag )
{
	tag = tag.toLowerCase();

	var aTag = this._opStack.pop();

	while( aTag != null )
	{
		if ( aTag.tag == tag )
		{
			aTag.closed = true;
			break;
		}
		aTag = this._opStack.pop();
	}

	this._curTag = this._opStack[ this._opStack.length - 1 ];
}

function StructureScanner_buildHTML( tag )
{
	this.inspectTag( tag );

	var prefix = "";
	var postfix = "";
	var childHTML = "";

	if ( tag.type == "text" )
	{
		prefix = tag.text;
	}
	else
	{
		this.startTag( tag );
	
		if ( tag.type == "tag" )
		{
			if ( tag.tag == "span" && tag.children.length == 0 )
			{
				tag.children.push( { type: "text", text: " " } );
			}

			var retVal = this.createTag( tag.tag, tag.attributes, tag.closed );

			if ( retVal == null )
				retVal = StructureScanner_createTag( tag.tag, tag.attributes, tag.closed );

			prefix = retVal.prefix;
			postfix = retVal.postfix;
		}

		for( var index in tag.children )
			childHTML += this.buildHTML( tag.children[ index ] );
			
		this.endTag( tag );

		if ( this.finalizeTag( tag.tag, tag.attributes, tag.closed, childHTML ) == false )
		{
			prefix = "";
			childHTML = " ";
			postfix = "";
		}
	}

	return prefix + childHTML + postfix;
}

function StructureScanner_finalizeTag( tag ) { return true; }

function StructureScanner_startTag( tag ) { }

function StructureScanner_endTag( tag ) { }

function StructureScanner_inspectTag( tag ) { return tag; }

function StructureScanner_createTag( tag, attributes, closed )
{
	var prefix = "";
	var postfix = "";

	prefix = "<" + tag;
	for( var key in attributes )
	{
		if ( attributes[ key ] != null )
			prefix += " " + key + "=\"" + handleAttributeText(attributes[ key ]) + "\"";
	}
	prefix += ">";

	if ( closed )
		postfix = "</" + tag + ">";

	return { prefix: prefix, postfix: postfix };
}


function StructureScanner_scan( source )
{
	var rootTag = { type: "root", children: [] };
	this._curTag = rootTag;
	this._opStack = [ this._curTag ];
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "StructureScanner bad scan" );

	var html = "";

	html = this.buildHTML( rootTag );

	return html;
}

function StructureScanner_directive( code, offset )
{
	try {

		this.addTextChild( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function StructureScanner_text( code, offset )
{
	try {

		this.addTextChild( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function StructureScanner_openTagBegin( tag, offset )
{
	try {

		this.addTagChild( tag );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function StructureScanner_closeTagBegin( tag, offset )
{
	try {

		this.finishTag( tag );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function StructureScanner_attribute( name, code )
{
	try {

		this.addAttribute( name, code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}




//---------------------------------------------------------------------------------------------------------
//  RemoveHiddenSpansScanner
//---------------------------------------------------------------------------------------------------------

// This scanner is specific to MS products.  This removes any DIV tags within the document.

function RemoveHiddenSpansScanner( )
{
}

// External methods

RemoveHiddenSpansScanner.prototype.scan = RemoveHiddenSpansScanner_scan;

// scanSourceString specific methods

RemoveHiddenSpansScanner.prototype.directive = RemoveHiddenSpansScanner_directive;
RemoveHiddenSpansScanner.prototype.text = RemoveHiddenSpansScanner_text;
RemoveHiddenSpansScanner.prototype.openTagBegin = RemoveHiddenSpansScanner_openTagBegin;
RemoveHiddenSpansScanner.prototype.openTagEnd = RemoveHiddenSpansScanner_openTagEnd;
RemoveHiddenSpansScanner.prototype.closeTagBegin = RemoveHiddenSpansScanner_closeTagBegin;
RemoveHiddenSpansScanner.prototype.attribute = RemoveHiddenSpansScanner_attribute;

function RemoveHiddenSpansScanner_scan( source, context )
{
	this._sb = context.createStringBuffer();
	this._attributes = [];
	this._hiddenSpanDepth = 0;
	this._tag = "";
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "RemoveHiddenSpansScanner bad scan" );

	return this._sb.get();
}

function RemoveHiddenSpansScanner_directive( code, offset )
{
	try {

		if ( this._hiddenSpanDepth == 0 )
			this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveHiddenSpansScanner_text( code, offset )
{
	try {

		if ( this._hiddenSpanDepth == 0 )
			this._sb.append( code );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveHiddenSpansScanner_openTagBegin( tag, offset )
{
	try {

		this._tag = tag;
		this._attributes = [];

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveHiddenSpansScanner_openTagEnd( tag, trailingFormat )
{
	try {

		if ( this._hiddenSpanDepth == 0 )
		{
			if ( ( this._tag == "span" && this._attributes[ "style" ] ) )
			{
				if ( this._attributes[ "style" ].match( /display:none/ ) )
				{
					this._hiddenSpanDepth = 1;
				}
			}

			if ( ( this._tag == "p" && this._attributes[ "class" ] ) )
			{
				if ( this._attributes[ "class" ].match( /MsoCommentText/ ) )
				{
					this._hiddenSpanDepth = 1;
				}
			}
			
			if ( ( this._tag == "span" && this._attributes[ "class" ] ) )
			{
				if ( this._attributes[ "class" ].match( /MsoCommentReference/ ) )
				{
					this._hiddenSpanDepth = 1;
				}
			}

			if ( this._hiddenSpanDepth == 0 )
			{
				this._sb.append( "<" + this._tag );
				for( key in this._attributes )
				{
					this._sb.append( " " + key + "=\"" + handleAttributeText(this._attributes[ key ]) + "\"" );
				}
				this._sb.append( ">" );
			}
		}
		else
			this._hiddenSpanDepth++;


	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveHiddenSpansScanner_attribute( name, code )
{
	try {

		if ( this._hiddenSpanDepth == 0 )
			this._attributes[ name ] = code;

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function RemoveHiddenSpansScanner_closeTagBegin( tag, offset )
{
	try {

		if ( this._hiddenSpanDepth == 0 )
		{
			this._sb.append( "</" + tag + ">" );
		}
		else
		{
			this._hiddenSpanDepth--;
		}

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}





//---------------------------------------------------------------------------------------------------------
//  DebugScanner
//---------------------------------------------------------------------------------------------------------

function DebugScanner( )
{
}

// External methods

DebugScanner.prototype.scan = DebugScanner_scan;

// scanSourceString specific methods

DebugScanner.prototype.directive = DebugScanner_directive;
DebugScanner.prototype.text = DebugScanner_text;
DebugScanner.prototype.openTagBegin = DebugScanner_openTagBegin;
DebugScanner.prototype.openTagEnd = DebugScanner_openTagEnd;
DebugScanner.prototype.closeTagBegin = DebugScanner_closeTagBegin;
DebugScanner.prototype.attribute = DebugScanner_attribute;

function DebugScanner_scan( source )
{
	this._exceptionTag = false;
	this._scan_error = false;

	dw.scanSourceString( source, this );

	if ( this._scan_error )
		throw( "DebugScanner bad scan" );
}

function DebugScanner_directive( code, offset )
{
	try {

		alert( "directive( " + code + ", " + offset + " )" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function DebugScanner_text( code, offset )
{
	try {

		alert( "text( " + code + ", " + offset + " )" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function DebugScanner_openTagBegin( tag, offset )
{
	try {

		alert( "openTagBegin( " + tag + ", " + offset + " )" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function DebugScanner_openTagEnd( tag, trailingFormat )
{
	try {

		alert( "openTagEnd( " + tag + ", " + trailingFormat + " )" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function DebugScanner_closeTagBegin( tag, offset )
{
	try {

		alert( "closeTagBegin( " + tag + ", " + offset + " )" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

function DebugScanner_attribute( name, code )
{
	try {
	
		alert( "attribute( " + name + ", " + handleAttributeText(code) + " )" );

	} catch( e ) {

		this._scan_error = true;
		return false;

	}

	return true;
}

