Logo Search packages:      
Sourcecode: beagle version File versions  Download package

HtmlAgilityPack::HtmlDocument Class Reference

List of all members.


Detailed Description

Represents a complete HTML document.

Definition at line 367 of file HtmlDocument.cs.


Public Member Functions

HtmlAttribute CreateAttribute (string name, string value)
 Creates an HTML attribute with the specified name.
HtmlAttribute CreateAttribute (string name)
 Creates an HTML attribute with the specified name.
HtmlCommentNode CreateComment (string comment)
 Creates an HTML comment node with the specified comment text.
HtmlCommentNode CreateComment ()
 Creates an HTML comment node.
HtmlNode CreateElement (string name)
 Creates an HTML element node with the specified name.
XPathNavigator CreateNavigator ()
 Creates a new XPathNavigator object for navigating this HTML document.
HtmlTextNode CreateTextNode (string text)
 Creates an HTML text node with the specified text.
HtmlTextNode CreateTextNode ()
 Creates an HTML text node.
Encoding DetectEncoding (TextReader reader)
 Detects the encoding of an HTML text provided on a TextReader.
Encoding DetectEncoding (string path)
 Detects the encoding of an HTML file.
Encoding DetectEncoding (Stream stream)
 Detects the encoding of an HTML stream.
void DetectEncodingAndLoad (string path, bool detectEncoding)
 Detects the encoding of an HTML document from a file first, and then loads the file.
void DetectEncodingAndLoad (string path)
 Detects the encoding of an HTML document from a file first, and then loads the file.
Encoding DetectEncodingHtml (string html)
 Detects the encoding of an HTML text.
HtmlNode GetElementbyId (string id)
 Gets the HTML node with the specified 'id' attribute value.
 HtmlDocument ()
 Creates an instance of an HTML document.
void Load (TextReader reader)
 Loads the HTML document from the specified TextReader.
void Load (string path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
 Loads an HTML document from a file.
void Load (string path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
 Loads an HTML document from a file.
void Load (string path, Encoding encoding)
 Loads an HTML document from a file.
void Load (string path, bool detectEncodingFromByteOrderMarks)
 Loads an HTML document from a file.
void Load (string path)
 Loads an HTML document from a file.
void Load (Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int buffersize)
 Loads an HTML document from a stream.
void Load (Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
 Loads an HTML document from a stream.
void Load (Stream stream, Encoding encoding)
 Loads an HTML document from a stream.
void Load (Stream stream, bool detectEncodingFromByteOrderMarks)
 Loads an HTML document from a stream.
void Load (Stream stream)
 Loads an HTML document from a stream.
void LoadHtml (string html)
 Loads the HTML document from the specified string.
delegate bool NodeHandler (HtmlNode node)
void Save (XmlWriter writer)
 Saves the HTML document to the specified XmlWriter.
void Save (TextWriter writer)
 Saves the HTML document to the specified TextWriter.
void Save (StreamWriter writer)
 Saves the HTML document to the specified StreamWriter.
void Save (string filename, System.Text.Encoding encoding)
 Saves the mixed document to the specified file.
void Save (string filename)
 Saves the mixed document to the specified file.
void Save (Stream outStream, System.Text.Encoding encoding)
 Saves the HTML document to the specified stream.
void Save (Stream outStream)
 Saves the HTML document to the specified stream.

Static Public Member Functions

static string HtmlEncode (string html)
 Applies HTML encoding to a specified string.
static bool IsWhiteSpace (int c)
 Determines if the specified character is considered as a whitespace character.

Public Attributes

bool OptionAddDebuggingAttributes = false
 Adds Debugging attributes to node. Default is false.
bool OptionAutoCloseOnEnd = false
 Defines if closing for non closed nodes must be done at the end or directly in the document. Setting this to true can actually change how browsers render the page. Default is false.
bool OptionCheckSyntax = true
 Defines if non closed nodes will be checked at the end of parsing. Default is true.
bool OptionComputeChecksum = false
 Defines if a checksum must be computed for the document while parsing. Default is false.
System.Text.Encoding OptionDefaultStreamEncoding = Encoding.UTF8
 Defines the default stream encoding to use. Default is System.Text.Encoding.Default.
bool OptionExtractErrorSourceText = false
 Defines if source text must be extracted while parsing errors. If the document has a lot of errors, or cascading errors, parsing performance can be dramatically affected if set to true. Default is false.
int OptionExtractErrorSourceTextMaxLength = 100
 Defines the maximum length of source text or parse errors. Default is 100.
bool OptionFixNestedTags = false
 Defines if LI, TR, TH, TD tags must be partially fixed when nesting errors are detected. Default is false.
bool OptionOutputAsXml = false
 Defines if output must conform to XML, instead of HTML.
bool OptionOutputOptimizeAttributeValues = false
 Defines if attribute value output must be optimized (not bound with double quotes if it is possible). Default is false.
bool OptionOutputUpperCase = false
 Defines if name must be output in uppercase. Default is false.
bool OptionReadEncoding = true
 Defines if declared encoding must be read from the document. Declared encoding is determined using the meta http-equiv="content-type" content="text/html;charset=XXXXX" html node. Default is true.
bool OptionUseIdAttribute = true
 Defines if the 'id' attribute must be specifically used. Default is true.
bool OptionWriteEmptyNodes = false
 Defines if empty nodes must be written as closed during output. Default is false.
NodeHandler ReportNode

Properties

int CheckSum [get]
 Gets the document CRC32 checksum if OptionComputeChecksum was set to true before parsing, 0 otherwise.
System.Text.Encoding DeclaredEncoding [get]
 Gets the document's declared encoding. Declared encoding is determined using the meta http-equiv="content-type" content="text/html;charset=XXXXX" html node.
HtmlNode DocumentNode [get]
 Gets the root node of the document.
System.Text.Encoding Encoding [get]
 Gets the document's output encoding.
ArrayList ParseErrors [get]
 Gets a list of parse errors found in the document.
System.Text.Encoding StreamEncoding [get]
 Gets the document's stream encoding.
bool StreamMode [get, set]

Private Types

enum  ParseState {
  Text, WhichTag, Tag, BetweenAttributes,
  EmptyTag, AttributeName, AttributeBeforeEquals, AttributeAfterEquals,
  AttributeValue, Comment, QuotedAttributeValue, ServerSideCode,
  PcDataQuote, PcData, Text, EntityStart,
  Text, Code
}

Private Member Functions

HtmlParseError AddError (HtmlParseErrorCode code, int line, int linePosition, int streamPosition, string sourceText, string reason)
void CloseCurrentNode ()
internal HtmlAttribute CreateAttribute ()
internal HtmlNode CreateNode (HtmlNodeType type, int index)
internal HtmlNode CreateNode (HtmlNodeType type)
string CurrentAttributeName ()
string CurrentAttributeValue ()
string CurrentNodeInner ()
string CurrentNodeName ()
string CurrentNodeOuter ()
void DecrementPosition ()
HtmlNode FindResetterNode (HtmlNode node, string name)
bool FindResetterNodes (HtmlNode node, string[] names)
void FixNestedTag (string name, string[] resetters)
void FixNestedTags ()
internal System.Text.Encoding GetOutEncoding ()
string[] GetResetters (string name)
internal HtmlNode GetXmlDeclaration ()
void IncrementPosition ()
bool NewCheck ()
void Parse ()
void PushAttributeNameEnd (int index)
void PushAttributeNameStart (int index)
void PushAttributeValueEnd (int index)
void PushAttributeValueStart (int index)
void PushNodeEnd (int index, bool close)
void PushNodeNameEnd (int index)
void PushNodeNameStart (bool starttag, int index)
void PushNodeStart (HtmlNodeType type, int index)
void ReadDocumentEncoding (HtmlNode node)
internal void SetIdForNode (HtmlNode node, string id)
internal void UpdateLastParentNode ()

Static Private Member Functions

static internal void Debug (string s)

Private Attributes

int _c
Crc32 _crc32 = null
HtmlAttribute _currentattribute
HtmlNode _currentnode
System.Text.Encoding _declaredencoding
HtmlNode _documentnode
bool _fullcomment
int _index
internal Hashtable _lastnodes = new Hashtable()
HtmlNode _lastparentnode
int _line
int _lineposition
int _maxlineposition
internal Hashtable _nodesid
ParseState _oldstate
bool _onlyDetectEncoding = false
internal Hashtable _openednodes
ArrayList _parseerrors = new ArrayList()
int _pcdata_quote_char = '\0'
ParseState _state
bool _stop_parsing = false
System.Text.Encoding _streamencoding
bool _streammode = false
internal StreamAsArray _text

Static Private Attributes

static bool _debug = false
static internal readonly string HtmlExceptionRefNotChild = "Reference node must be a child of this node"
static internal readonly string HtmlExceptionUseIdAttributeFalse = "You need to set UseIdAttribute property to true to enable this feature"

The documentation for this class was generated from the following file:

Generated by  Doxygen 1.6.0   Back to index