//
//  XTOutputTextPaeser.m
//  TadsTerp
//
//  Created by Rune Berg on 08/04/14.
//  Copyright (c) 2014 Rune Berg. All rights reserved.
//

#import "XTOutputTextParserHtml.h"
#import "XTPrefs.h"
#import "XTLogger.h"
#import "XTHtmlCharEntityParser.h"
#import "XTHtmlTag.h"
#import "XTHtmlTagBr.h"
#import "XTHtmlTagP.h"
#import "XTHtmlTagTab.h"
#import "XTHtmlTagTitle.h"
#import "XTHtmlTagEm.h"
#import "XTHtmlTagQ.h"
#import "XTHtmlTagH1.h"
#import "XTHtmlTagH2.h"
#import "XTHtmlTagH3.h"
#import "XTHtmlTagH4.h"
#import "XTHtmlTagI.h"
#import "XTHtmlTagA.h"
#import "XTHtmlTagB.h"
#import "XTHtmlTagU.h"
#import "XTHtmlTagQuestionMarkT2.h"
#import "XTHtmlTagQuestionMarkT3.h"
#import "XTHtmlTagStrong.h"
#import "XTHtmlTagCenter.h"
#import "XTHtmlTagOl.h"
#import "XTHtmlTagLi.h"
#import "XTHtmlTagHr.h"
#import "XTHtmlTagNoop.h"
#import "XTHtmlTagAboutBox.h"
#import "XTHtmlTagBanner.h"
#import "XTHtmlTagTt.h"
#import "XTHtmlTagDiv.h"
#import "XTHtmlTagUl.h"
#import "XTHtmlTagTable.h"
#import "XTHtmlTagTr.h"
#import "XTHtmlTagTh.h"
#import "XTHtmlTagTd.h"
#import "XTHtmlTagBlockQuote.h"
#import "XTHtmlTagBq.h"
#import "XTHtmlTagCite.h"
#import "XTHtmlTagFont.h"
#import "XTHtmlTagCode.h"
#import "XTHtmlTagPre.h"
#import "XTHtmlTagImg.h"
#import "XTHtmlTagSmall.h"
#import "XTHtmlTagBig.h"
#import "XTHtmlTagPseudoTest.h"
#import "XTHtmlWhitespace.h"
#import "XTHtmlQuotedSpace.h"
#import "XTHtmlNonbreakingSpace.h"
#import "XTHtmlSpecialSpace.h"
#import "XTAllocDeallocCounter.h"


@interface XTOutputTextParserHtml ()

@property XTHtmlCharEntityParser *charEntityParser;

@property NSArray *htmlTagDefs;
@property NSArray *ignoredTagNames;

@property NSCharacterSet *tagNameCharSet;
@property NSCharacterSet *attributeNameCharSet;
@property NSCharacterSet *whitespaceCharSet;
@property NSCharacterSet *editableWhitespaceCharSet; // ' ', \n \t
@property NSCharacterSet *specialSpaceCharSet; // typographical spaces: ensp, emsp, ...
@property NSCharacterSet *illegalInUnuotedAttributeValueCharSet;

@property NSMutableString *regularTextBuffer;
@property NSMutableString *whitespaceBuffer;
@property NSMutableString *tagTextBuffer; // entire tag text: brackets, name, attrs
@property NSMutableString *tagNameBuffer;
@property BOOL closing;
@property unichar attributeQuoteChar;
@property NSMutableString *attributeNameBuffer;
@property NSMutableString *attributeValueBuffer;
@property NSMutableDictionary *attributes;

@end


@implementation XTOutputTextParserHtml

static XTLogger* logger;

#define BUFFER_REGULAR_TEXT [self.regularTextBuffer appendString:[NSString stringWithCharacters:&ch length:1]]
#define CLEAR_REGULAR_TEXT [self.regularTextBuffer setString:@""]

#define BUFFER_WHITESPACE [self.whitespaceBuffer appendString:[NSString stringWithCharacters:&ch length:1]]
#define CLEAR_WHITESPACE [self.whitespaceBuffer setString:@""]

#define CLEAR_TAG_TEXT [self.tagTextBuffer setString:@""]
#define BUFFER_TAG_TEXT [self.tagTextBuffer appendString:[NSString stringWithCharacters:&ch length:1]]

#define CLEAR_TAG_NAME [self.tagNameBuffer setString:@""]
#define BUFFER_TAG_NAME [self.tagNameBuffer appendString:[NSString stringWithCharacters:&ch length:1]]

#define CLEAR_ATTRIBUTE_NAME [self.attributeNameBuffer setString:@""]
#define BUFFER_ATTRIBUTE_NAME [self.attributeNameBuffer appendString:[NSString stringWithCharacters:&ch length:1]]
#define CLEAR_ATTRIBUTE_VALUE [self.attributeValueBuffer setString:@""]
#define BUFFER_ATTRIBUTE_VALUE [self.attributeValueBuffer appendString:[NSString stringWithCharacters:&ch length:1]]

#define CLEAR_ATTRIBUTES [self.attributes removeAllObjects]
#define ADD_ATTRIBUTE [self addAttribute];

#define ADD_TEXT_ELEMENT \
	[self addRegularTextElement:self.regularTextBuffer toArray:res]; \
	CLEAR_REGULAR_TEXT

#define ADD_WHITESPACE_ELEMENT \
	{ \
	NSString *wsText = [NSString stringWithString:self.whitespaceBuffer]; \
	XTHtmlWhitespace *whitespace = [XTHtmlWhitespace whitespaceWithText:wsText]; \
	[res addObject:whitespace]; \
	CLEAR_WHITESPACE; \
	}

#define ADD_TAG_ELEMENT \
	XTHtmlTag *tag = [self makeTag]; \
	if (tag != nil) { \
		[res addObject:tag]; \
	} else { \
		/* TODO consider a new type of formatting element for this case, for easier styling/formatting */ \
		if (self.tagTextBuffer.length >= 1) { \
			XTPrefs *prefs = [XTPrefs prefs]; \
			if (prefs.printBrokenHtmlMarkup.boolValue) { \
				[self addRegularTextElement:self.tagTextBuffer toArray:res]; \
			}\
			XT_WARN_1(@"bad tag text \"%@\"", self.tagTextBuffer); \
		} \
	} \
	CLEAR_TAG_TEXT; \
	CLEAR_TAG_NAME; \
	CLEAR_ATTRIBUTE_NAME; \
	CLEAR_ATTRIBUTE_VALUE; \
	CLEAR_ATTRIBUTES; \
	self.closing = NO;

#define ADD_INCOMPLETE_CHAR_ENTITY \
	{ \
		NSString *entityText = [self.charEntityParser hardFlush]; \
		if (entityText.length >= 1) { \
			[res addObject:entityText]; \
		} \
	}

#define HANDLE_START_OF_MISFORMED_TAG \
	XT_WARN_3(@"(state %d) unexpected character '%C' at index %lu - handling as misformed tag until next '>'", self.state, ch, index);

#define HANDLE_UNEXPECTED_CHAR_IN_TAG_NAME \
	XT_WARN_3(@"(state %d) unexpected character '%C' at index %lu - continuing tag...", self.state, ch, index);

#define HANDLE_UNEXPECTED_CHAR_IN_TAG_ATTR \
	XT_WARN_3(@"(state %d) unexpected character '%C' at index %lu - continuing tag...", self.state, ch, index);

#define HANDLE_END_OF_MISFORMED_TAG \
	XTPrefs *prefs = [XTPrefs prefs]; \
	if (prefs.printBrokenHtmlMarkup.boolValue) { \
		/* TODO consider a new type of formatting element for this case, for easier styling/formatting */ \
		[self addRegularTextElement:self.tagTextBuffer toArray:res]; \
	} \
	CLEAR_REGULAR_TEXT; \
	CLEAR_WHITESPACE; \
	CLEAR_TAG_TEXT; \
	CLEAR_TAG_NAME; \
	CLEAR_ATTRIBUTE_NAME; \
	CLEAR_ATTRIBUTE_VALUE; \
	CLEAR_ATTRIBUTES; \
	self.state = XT_HTML_INITIAL;

+ (void)initialize
{
	logger = [XTLogger loggerForClass:[XTOutputTextParserHtml class]];
}

OVERRIDE_ALLOC_FOR_COUNTER

OVERRIDE_DEALLOC_FOR_COUNTER

- (id)init
{
    self = [super init];
    if (self) {
		[self initTagDefs];

		NSMutableCharacterSet *tempTNameCharSet = [NSMutableCharacterSet alphanumericCharacterSet];
		[tempTNameCharSet formUnionWithCharacterSet:[NSCharacterSet decimalDigitCharacterSet]];
		[tempTNameCharSet addCharactersInString:@"_?"];
		_tagNameCharSet = tempTNameCharSet;
		
		_attributeNameCharSet = [self.tagNameCharSet mutableCopy];
		_whitespaceCharSet = [NSCharacterSet whitespaceAndNewlineCharacterSet];
		_charEntityParser = [XTHtmlCharEntityParser new];
		_editableWhitespaceCharSet = [NSCharacterSet characterSetWithCharactersInString:@" \t\n"];
		_specialSpaceCharSet = [NSCharacterSet characterSetWithCharactersInString:@"\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a"];
			//TODO unit test 2004...
		_illegalInUnuotedAttributeValueCharSet = [NSCharacterSet characterSetWithCharactersInString:@"\"'`=<>"];
		
		_state = XT_HTML_INITIAL;
		_regularTextBuffer = [NSMutableString stringWithCapacity:500];
		_whitespaceBuffer =[NSMutableString stringWithCapacity:500];
		_tagTextBuffer = [NSMutableString stringWithCapacity:100];
		_tagNameBuffer = [NSMutableString stringWithCapacity:100];
		_closing = NO;
		_attributeNameBuffer = [NSMutableString stringWithCapacity:100];
		_attributeValueBuffer = [NSMutableString stringWithCapacity:100];
		_attributes = [NSMutableDictionary dictionary];
    }
    return self;
}

- (void)teardown
{
}

- (void)resetForNextCommand
{
	self.state = XT_HTML_INITIAL;
	self.closing = NO;
	CLEAR_REGULAR_TEXT;
	CLEAR_WHITESPACE;
	CLEAR_TAG_TEXT;
	CLEAR_TAG_NAME;
	CLEAR_ATTRIBUTE_NAME;
	CLEAR_ATTRIBUTE_VALUE;
	CLEAR_ATTRIBUTES;
	[self.charEntityParser reset];
}

- (NSArray *)parse:(NSString *)string
{
	XT_DEF_SELNAME;
	XT_TRACE_1(@"\"%@\"", string);
	
	NSMutableArray *res = [NSMutableArray arrayWithCapacity:5];
	
	for (NSUInteger index = 0; index < string.length; index++) {
		const unichar ch = [string characterAtIndex:index];
		switch (self.state) {
			case XT_HTML_INITIAL:
				if ([self.editableWhitespaceCharSet characterIsMember:ch]) {
					BUFFER_WHITESPACE;
					self.state = XT_HTML_IN_WHITESPACE;
				} else if (ch == '<') {
					BUFFER_TAG_TEXT;
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					BUFFER_REGULAR_TEXT;
					self.state = XT_HTML_IN_TEXT;
				}
				break;
			case XT_HTML_IN_WHITESPACE:
				if ([self.editableWhitespaceCharSet characterIsMember:ch]) {
					BUFFER_WHITESPACE;
					// self.state = XT_HTML_IN_WHITESPACE;
				} else if (ch == '<') {
					ADD_WHITESPACE_ELEMENT;
					BUFFER_TAG_TEXT;
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					ADD_WHITESPACE_ELEMENT;
					BUFFER_REGULAR_TEXT;
					self.state = XT_HTML_IN_TEXT;
				}
				break;
			case XT_HTML_IN_TEXT:
				if ([self.editableWhitespaceCharSet characterIsMember:ch]) {
					ADD_TEXT_ELEMENT;
					ADD_INCOMPLETE_CHAR_ENTITY;
					BUFFER_WHITESPACE;
					self.state = XT_HTML_IN_WHITESPACE;
				} else if (ch == '<') {
					ADD_TEXT_ELEMENT;
					ADD_INCOMPLETE_CHAR_ENTITY;
					BUFFER_TAG_TEXT;
					self.state = XT_HTML_AFTER_TAG_START;
				} else {
					BUFFER_REGULAR_TEXT;
					//self.state = XT_HTML_IN_TEXT;
				}
				break;
			case XT_HTML_AFTER_TAG_START:
				BUFFER_TAG_TEXT;
				if ([self.tagNameCharSet characterIsMember:ch]) {
					BUFFER_TAG_NAME;
					self.state = XT_HTML_IN_TAG_NAME;
				} else if (ch == '/' || ch == '\\') {
					self.state = XT_HTML_IN_TAG_NAME;
					self.closing = YES;
				} else if (ch == '.') {
					BUFFER_TAG_NAME; // yup, we consider '.' part of the tag name
					self.state = XT_HTML_AFTER_PSEUDOTAG_START;
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_AFTER_PSEUDOTAG_START:
				BUFFER_TAG_TEXT;
				if ([self.tagNameCharSet characterIsMember:ch]) {
					BUFFER_TAG_NAME;
					self.state = XT_HTML_IN_TAG_NAME;
				} else if (ch == '/' || ch == '\\') {
					self.state = XT_HTML_IN_TAG_NAME;
					self.closing = YES;
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_IN_TAG_NAME:
				BUFFER_TAG_TEXT;
				if ([self.tagNameCharSet characterIsMember:ch]) {
					BUFFER_TAG_NAME;
					// state = IN_TAG_NAME;
				} else if ([self.whitespaceCharSet characterIsMember:ch]) {
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if (ch == '>') {
					ADD_TAG_ELEMENT;
					self.state = XT_HTML_INITIAL;
				} else if (ch == '/' || ch == '\\') {
					self.state = XT_HTML_AT_CLOSING_SLASH;
				} else if (ch == '=') {
					HANDLE_UNEXPECTED_CHAR_IN_TAG_NAME; // but don't buffer it
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_AFTER_TAG_NAME:
				BUFFER_TAG_TEXT;
				if ([self.whitespaceCharSet characterIsMember:ch]) {
					// state == AFTER_TAG_NAME;
				} else if (ch == '/' || ch == '\\') {
					self.state = XT_HTML_AT_CLOSING_SLASH;
				} else if (ch == '>') {
					ADD_TAG_ELEMENT;
					self.state = XT_HTML_INITIAL;
				} else if ([self.attributeNameCharSet characterIsMember:ch]) {
					CLEAR_ATTRIBUTE_NAME;
					BUFFER_ATTRIBUTE_NAME;
					self.state = XT_HTML_IN_ATTRIBUTE_NAME;
				} else {
					HANDLE_UNEXPECTED_CHAR_IN_TAG_ATTR;
					CLEAR_ATTRIBUTE_NAME;
					BUFFER_ATTRIBUTE_NAME;
					self.state = XT_HTML_IN_ATTRIBUTE_NAME;
				}
				break;
			case XT_HTML_IN_ATTRIBUTE_NAME:
				BUFFER_TAG_TEXT;
				if ([self.attributeNameCharSet characterIsMember:ch]) {
					BUFFER_ATTRIBUTE_NAME;
					// state = XT_HTML_IN_ATTRIBUTE_NAME;
				} else if ([self.whitespaceCharSet characterIsMember:ch]) {
					ADD_ATTRIBUTE;
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if (ch == '/' || ch == '\\') {
					ADD_ATTRIBUTE;
					self.state = XT_HTML_AT_CLOSING_SLASH;
				} else if (ch == '>') {
					ADD_ATTRIBUTE;
					ADD_TAG_ELEMENT;
					self.state = XT_HTML_INITIAL;
				} else if (ch == '=') {
					CLEAR_ATTRIBUTE_VALUE;
					self.state = XT_HTML_STARTING_ATTRIBUTE_VALUE;
				} else {
					HANDLE_UNEXPECTED_CHAR_IN_TAG_ATTR;
					BUFFER_ATTRIBUTE_NAME;
					self.state = XT_HTML_IN_ATTRIBUTE_NAME;
				}
				break;
			case XT_HTML_STARTING_ATTRIBUTE_VALUE:
				BUFFER_TAG_TEXT;
				if (ch == '>') {
					ADD_ATTRIBUTE;
					ADD_TAG_ELEMENT;
					self.state = XT_HTML_INITIAL;
				} else if (ch == '/' || ch == '\\') {
					ADD_ATTRIBUTE;
					self.state = XT_HTML_AT_CLOSING_SLASH;
				} else if (ch == '"') {
					self.state = XT_HTML_IN_QUOTED_ATTRIBUTE_VALUE;
					self.attributeQuoteChar = ch;
				} else if (ch == '\'') {
					self.state = XT_HTML_IN_QUOTED_ATTRIBUTE_VALUE;
					self.attributeQuoteChar = ch;
				} else if ([self.whitespaceCharSet characterIsMember:ch]) {
					ADD_ATTRIBUTE;
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if ([self isLegalUnquotedAttributeValueChar:ch]) {
					BUFFER_ATTRIBUTE_VALUE;
					self.state = XT_HTML_IN_UNQUOTED_ATTRIBUTE_VALUE;
				} else {
					HANDLE_UNEXPECTED_CHAR_IN_TAG_ATTR;
					BUFFER_ATTRIBUTE_VALUE;
					self.state = XT_HTML_IN_UNQUOTED_ATTRIBUTE_VALUE;
				}
				break;
			case XT_HTML_IN_UNQUOTED_ATTRIBUTE_VALUE:
				BUFFER_TAG_TEXT;
				if (ch == '>') {
					ADD_ATTRIBUTE;
					ADD_TAG_ELEMENT;
					self.state = XT_HTML_INITIAL;
				//} else if (ch == '/' || ch == '\\') {
				//	ADD_ATTRIBUTE;
				//	self.state = XT_HTML_AT_CLOSING_SLASH;
				} else if ([self.whitespaceCharSet characterIsMember:ch]) {
					ADD_ATTRIBUTE;
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if ([self isLegalUnquotedAttributeValueChar:ch]) {
					BUFFER_ATTRIBUTE_VALUE;
					//self.state = XT_HTML_IN_UNQUOTED_ATTRIBUTE_VALUE;
				} else {
					HANDLE_UNEXPECTED_CHAR_IN_TAG_ATTR;
					BUFFER_ATTRIBUTE_VALUE;
					//self.state = XT_HTML_IN_UNQUOTED_ATTRIBUTE_VALUE;
				}
				break;
			case XT_HTML_IN_QUOTED_ATTRIBUTE_VALUE:
				BUFFER_TAG_TEXT;
				if (ch == '>') {
					if (self.closing) {
						ADD_ATTRIBUTE;
						ADD_TAG_ELEMENT;
						self.state = XT_HTML_INITIAL;
					} else {
						//TODO exp: ADD_ATTRIBUTE;
						BUFFER_ATTRIBUTE_VALUE;
						//TODO exp rm'd: ADD_TAG_ELEMENT;
						//TODO exp: HANDLE_START_OF_MISFORMED_TAG;
						HANDLE_UNEXPECTED_CHAR_IN_TAG_ATTR;
						//TODO exp: self.state = XT_HTML_IN_MISFORMED_TAG;
					}
				} else if (ch == '"' && self.attributeQuoteChar == ch) {
					ADD_ATTRIBUTE;
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if (ch == '\'' && self.attributeQuoteChar == ch) {
					ADD_ATTRIBUTE;
					self.state = XT_HTML_AFTER_TAG_NAME;
				} else if ([self isLegalQuotedAttributeValueChar:ch]) {
					BUFFER_ATTRIBUTE_VALUE;
					// state = IN_QUOTED_ATTRIBUTE_VALUE;
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_AT_CLOSING_SLASH:
				BUFFER_TAG_TEXT;
				if (ch == '>') {
					ADD_TAG_ELEMENT;
					self.state = XT_HTML_INITIAL;
				} else if ([self.whitespaceCharSet characterIsMember:ch]) {
					// state == XT_HTML_AT_CLOSING_SLASH;
				} else {
					HANDLE_START_OF_MISFORMED_TAG;
					self.state = XT_HTML_IN_MISFORMED_TAG;
				}
				break;
			case XT_HTML_IN_MISFORMED_TAG:
				BUFFER_TAG_TEXT;
				if (ch == '>') {
					HANDLE_END_OF_MISFORMED_TAG;
					self.state = XT_HTML_INITIAL;
				}
				break;
			default:
				// This shouldn't happen - reset everything
				XT_ERROR_1(@"*** BUG! Unexpected state %d - reset all parser states and buffers", self.state);
				CLEAR_REGULAR_TEXT;
				CLEAR_WHITESPACE;
				CLEAR_TAG_TEXT;
				CLEAR_TAG_NAME;
				CLEAR_ATTRIBUTE_NAME;
				CLEAR_ATTRIBUTE_VALUE;
				CLEAR_ATTRIBUTES;
				self.state = XT_HTML_INITIAL;
				[self.charEntityParser reset];
				break;
		}
	}

	return res;
}

- (NSArray *)flush
{
	XT_DEF_SELNAME;
	
	NSMutableArray *res = [NSMutableArray arrayWithCapacity:5];
	
	switch (self.state) {
		case XT_HTML_INITIAL:
			// nothing
			break;
		case XT_HTML_IN_WHITESPACE:
			ADD_WHITESPACE_ELEMENT;
			self.state = XT_HTML_INITIAL;
			break;
		case XT_HTML_IN_TEXT:
			ADD_TEXT_ELEMENT;
			// ... but DON'T add incomplete char entity
			break;
		case XT_HTML_IN_MISFORMED_TAG:
			XT_WARN_2(@"(state %d) flushing misformed HTML: \"%@\"", self.state, self.tagTextBuffer);
			/* EXP: do nothing, as per mjr's advice 2015-07-01
			if (self.printBrokenHtmlMarkup) {
				[self addRegularTextElement:self.tagTextBuffer toArray:res];
			}*/
			break;
		default:
			// In a legal but incomplete html tag state:
			/* EXP: do nothing, as per mjr's advice 2015-07-01
			if (self.tagTextBuffer.length >= 1) {
				XT_WARN_2(@"(state %d) flushing incomplete HTML tag: \"%@\"", self.state, self.tagTextBuffer);
				if (self.printBrokenHtmlMarkup) {
					[self addRegularTextElement:self.tagTextBuffer toArray:res];
				}
			}*/
			break;
	}
	
	return res;
}

- (NSArray *)hardFlush
{
	//XT_DEF_SELNAME;
	
	NSMutableArray *res = [NSMutableArray arrayWithArray:[self flush]];
	if (self.state == XT_HTML_IN_TEXT || self.state == XT_HTML_INITIAL) {
		NSString *entityText = [self.charEntityParser hardFlush];
		if (entityText.length >= 1) {
			[res addObject:entityText];
		}
	}
	return res;
}

- (BOOL)needsFlushing
{
	//TODO refine later
	return YES;
}

- (BOOL)isLegalUnquotedAttributeValueChar:(unichar)ch
{
	BOOL res = ! [self.illegalInUnuotedAttributeValueCharSet characterIsMember:ch];
	return res;
}

- (BOOL)isLegalQuotedAttributeValueChar:(unichar)ch
{
	BOOL res = (ch != self.attributeQuoteChar);
	return res;
}

- (XTHtmlTag *)makeTag
{
	XT_DEF_SELNAME;
	
	XTHtmlTag *tag = nil;
	NSString *tagName = self.tagNameBuffer;
	Class tagClass = [self findTagClassForName:tagName];
	
	if (tagClass != nil) {
		tag = [[tagClass alloc] init];
		tag.closing = self.closing;
		tag.attributes = [self.attributes copy];
		//XT_TRACE_2(@"OK tag \"%@\" for string \"%@\"", [tagClass name], self.tagNameBuffer);
	} else if ([self shouldIgnoreTagNamed:tagName]) {
		XT_TRACE_1(@"ignoring known tag \"%@\"", tagName);
		tag = [XTHtmlTagNoop new];
	} else {
		 XT_WARN_1(@"unknown tag \"%@\"", tagName);
	}

	return tag;
}

- (void)initTagDefs
{
	_htmlTagDefs = @[
	  [XTHtmlTagP class],
	  [XTHtmlTagBr class],
	  [XTHtmlTagTab class],
	  [XTHtmlTagTitle class],
	  [XTHtmlTagDiv class],
	  [XTHtmlTagQ class],
	  [XTHtmlTagH1 class],
	  [XTHtmlTagH2 class],
	  [XTHtmlTagH3 class],
	  [XTHtmlTagH4 class],
	  [XTHtmlTagI class],
	  [XTHtmlTagEm class],
	  [XTHtmlTagA class],
	  [XTHtmlTagB class],
	  [XTHtmlTagU class],
	  [XTHtmlTagTt class],
	  [XTHtmlTagQuestionMarkT2 class],
	  [XTHtmlTagQuestionMarkT3 class],
	  [XTHtmlTagStrong class],
	  [XTHtmlTagCenter class],
	  [XTHtmlTagAboutBox class],
	  [XTHtmlTagOl class],
	  [XTHtmlTagUl class],
	  [XTHtmlTagLi class],
	  [XTHtmlTagHr class],
	  [XTHtmlTagBanner class],
	  [XTHtmlTagTable class],
	  [XTHtmlTagTr class],
	  [XTHtmlTagTh class],
	  [XTHtmlTagTd class],
	  [XTHtmlTagBlockQuote class],
	  [XTHtmlTagBq class],
	  [XTHtmlTagCite class],
	  [XTHtmlTagFont class],
	  [XTHtmlTagCode class],
	  [XTHtmlTagPre class],
	  [XTHtmlTagImg class],
	  [XTHtmlTagSmall class],
	  [XTHtmlTagBig class],
	  [XTHtmlTagPseudoTest class]
	];
	
	_ignoredTagNames = @[
	   @"sup",
	   @"sound",
	   @"body",
	   //@"tr",
	   //@"td",
	   @"tc",
	   //@"th",
	   @"nobr", //TODO handle
	   @"map",
	   @"area"
	   //TODO more?
	];
}

- (Class)findTagClassForName:(NSString *)tagName
{
	Class res = nil;
	for (Class clazz in self.htmlTagDefs) {
		if ([tagName caseInsensitiveCompare:[clazz name]] == NSOrderedSame) {
			res = clazz;
			break;
		}
	}
	return res;
}

- (BOOL)shouldIgnoreTagNamed:(NSString *)tagName
{
	BOOL res = NO;
	for (NSString *ignoredTagName in self.ignoredTagNames) {
		if ([tagName caseInsensitiveCompare:ignoredTagName] == NSOrderedSame) {
			res = YES;
			break;
		}
	}
	return res;
}

- (void)addAttribute
{
	[self.charEntityParser reset];
	 
	NSString *attributeName = [NSString stringWithString:self.attributeNameBuffer];
	attributeName = [attributeName lowercaseString];
	NSString *attributeValue = [NSString stringWithString:self.attributeValueBuffer];
	NSString *attributeValueExpandedPt1 = [self.charEntityParser parse:attributeValue];
	NSString *attributeValueExpandedPt2 = [self.charEntityParser hardFlush];
	NSString *attributeValueExpanded = [NSString stringWithFormat:@"%@%@", attributeValueExpandedPt1, attributeValueExpandedPt2];
	self.attributes[attributeName] = attributeValueExpanded;
	
	CLEAR_ATTRIBUTE_NAME;
	CLEAR_ATTRIBUTE_VALUE;
	[self.charEntityParser reset];
}

- (void)addRegularTextElement:(NSString *)string toArray:(NSMutableArray *)array
{
	if (string != nil && string.length >= 1) {

		NSString *expandedString = [self.charEntityParser parse:string];
		NSUInteger stringLen = expandedString.length;
		NSMutableString *regularTextElement = [NSMutableString string];

		for (NSUInteger idx = 0; idx < stringLen; idx++) {
			unichar ch = [expandedString characterAtIndex:idx];
			id spaceFmtElt = nil;
			if (ch == 0x15) {
				spaceFmtElt = [XTHtmlQuotedSpace quotedSpace];
			} else if (ch == 0xA0) {
				spaceFmtElt = [XTHtmlNonbreakingSpace new];
			} else if ([self.specialSpaceCharSet characterIsMember:ch]) {
				spaceFmtElt = [XTHtmlSpecialSpace specialSpaceWithChar:ch];
			}
			if (spaceFmtElt != nil) {
				if (regularTextElement.length >= 1) {
					// Add what we have and start a new element
					[array addObject:regularTextElement];
					regularTextElement = [NSMutableString string];
				}
				[array addObject:spaceFmtElt];
			} else {
				[regularTextElement appendFormat:@"%C", ch];
			}
		}
		if (regularTextElement.length >= 1) {
			[array addObject:regularTextElement];
		}
	}
}

@end
