xmlwrapp
xmlwrapp/document.h
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2001-2003 Peter J Jones (pjones@pmade.org)
00003  * All Rights Reserved
00004  * 
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  * 
00009  * 1. Redistributions of source code must retain the above copyright
00010  *    notice, this list of conditions and the following disclaimer.
00011  * 2. Redistributions in binary form must reproduce the above copyright
00012  *    notice, this list of conditions and the following disclaimer in
00013  *    the documentation and/or other materials provided with the
00014  *    distribution.
00015  * 3. Neither the name of the Author nor the names of its contributors
00016  *    may be used to endorse or promote products derived from this software
00017  *    without specific prior written permission.
00018  * 
00019  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
00020  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
00021  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00022  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR
00023  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00025  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00026  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
00027  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00028  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00029  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00030  * SUCH DAMAGE.
00031  */
00032 
00033 /**
00034     @file
00035 
00036     This file contains the definition of the xml::document class.
00037  */
00038 
00039 #ifndef _xmlwrapp_document_h_
00040 #define _xmlwrapp_document_h_
00041 
00042 // xmlwrapp includes
00043 #include "xmlwrapp/init.h"
00044 #include "xmlwrapp/node.h"
00045 
00046 // standard includes
00047 #include <iosfwd>
00048 #include <string>
00049 #include <cstddef>
00050 
00051 // forward declaration
00052 namespace xslt
00053 {
00054 
00055 class stylesheet;
00056 namespace impl
00057 {
00058 class result;
00059 }
00060 
00061 } // end xslt namespace
00062 
00063 namespace xml
00064 {
00065 
00066 // forward declarations
00067 class tree_parser;
00068 
00069 namespace impl
00070 {
00071 struct doc_impl;
00072 }
00073 
00074 /**
00075     The xml::document class is used to hold the XML tree and various bits of
00076     information about it.
00077  */
00078 class document
00079 {
00080 public:
00081     /// size type
00082     typedef std::size_t size_type;
00083 
00084     /**
00085         Create a new XML document with the default settings. The new document
00086         will contain a root node with a name of "blank".
00087      */
00088     document();
00089 
00090     /**
00091         Create a new XML document and set the name of the root element to the
00092         given text.
00093 
00094         @param root_name What to set the name of the root element to.
00095      */
00096     explicit document(const char *root_name);
00097 
00098     /**
00099         Create a new XML document and set the root node.
00100 
00101         @param n The node to use as the root node. n will be copied.
00102      */
00103     explicit document(const node& n);
00104 
00105     /**
00106         Copy construct a new XML document. The new document will be an exact
00107         copy of the original.
00108 
00109         @param other The other document object to copy from.
00110      */
00111     document(const document& other);
00112 
00113     /**
00114         Copy another document object into this one using the assignment
00115         operator. This document object will be an exact copy of the other
00116         document after the assignement.
00117 
00118         @param other The document to copy from.
00119         @return *this.
00120      */
00121     document& operator=(const document& other);
00122 
00123     /**
00124         Swap one xml::document object for another.
00125 
00126         @param other The other document to swap
00127      */
00128     void swap(document& other);
00129 
00130     /**
00131         Clean up after an XML document object.
00132      */
00133     ~document();
00134 
00135     /**
00136         Get a reference to the root node of this document. If no root node
00137         has been set, the returned node will be a blank node. You should take
00138         caution to use a reference so that you don't copy the whole node
00139         tree!
00140 
00141         @return A const reference to the root node.
00142      */
00143     const node& get_root_node() const;
00144 
00145     /**
00146         Get a reference to the root node of this document. If no root node
00147         has been set, the returned node will be a blank node. You should take
00148         caution to use a reference so that you don't copy the whole node
00149         tree!
00150 
00151         @return A reference to the root node.
00152      */
00153     node& get_root_node();
00154 
00155     /**
00156         Set the root node to the given node. A full copy is made and stored
00157         in the document object.
00158 
00159         @param n The new root node to use.
00160      */
00161     void set_root_node(const node& n);
00162 
00163     /**
00164         Get the XML version for this document. For generated documents, the
00165         version will be the default. For parsed documents, this will be the
00166         version from the XML processing instruction.
00167 
00168         @return The XML version string for this document.
00169      */
00170     const std::string& get_version() const;
00171 
00172     /**
00173         Set the XML version number for this document. This version string
00174         will be used when generating the XML output.
00175 
00176         @param version The version string to use, like "1.0".
00177      */
00178     void set_version(const char *version);
00179 
00180     /**
00181         Get the XML encoding for this document. The default encoding is
00182         ISO-8859-1.
00183 
00184         @return The encoding string.
00185      */
00186     const std::string& get_encoding() const;
00187 
00188     /**
00189         Set the XML encoding string. If you don't set this, it will default
00190         to ISO-8859-1.
00191 
00192         @param encoding The XML encoding to use.
00193      */
00194     void set_encoding(const char *encoding);
00195 
00196     /**
00197         Find out if the current document is a standalone document. For
00198         generated documents, this will be the default. For parsed documents
00199         this will be set based on the XML processing instruction.
00200 
00201         @return True if this document is standalone.
00202         @return False if this document is not standalone.
00203      */
00204     bool get_is_standalone() const;
00205 
00206     /**
00207         Set the standalone flag. This will show up in the XML output in the
00208         correct processing instruction.
00209 
00210         @param sa What to set the standalone flag to.
00211      */
00212     void set_is_standalone(bool sa);
00213 
00214     /**
00215         Walk through the document and expand <xi:include> elements. For more
00216         information, please see the w3c recomendation for XInclude.
00217         http://www.w3.org/2001/XInclude.
00218 
00219         The return value of this function may change to int after a bug has
00220         been fixed in libxml2 (xmlXIncludeDoProcess).
00221 
00222         @return False if there was an error with substitutions.
00223         @return True if there were no errors (with or without substitutions).
00224      */
00225     bool process_xinclude();
00226 
00227     /**
00228         Test to see if this document has an internal subset. That is, DTD
00229         data that is declared within the XML document itself.
00230 
00231         @return True if this document has an internal subset.
00232         @return False otherwise.
00233      */
00234     bool has_internal_subset() const;
00235 
00236     /**
00237         Test to see if this document has an external subset. That is, it
00238         references a DTD from an external source, such as a file or URL.
00239 
00240         @return True if this document has an external subset.
00241         @return False otherwise.
00242      */
00243     bool has_external_subset() const;
00244 
00245     /**
00246         Validate this document against the DTD that has been attached to it.
00247         This would happen at parse time if there was a !DOCTYPE definition.
00248         If the DTD is valid, and the document is valid, this member function
00249         will return true.
00250 
00251         If it returns false, you may want to send the document through
00252         xmllint to get the actual error messages.
00253 
00254         @return True if the document is valid.
00255         @return False if there was a problem with the DTD or XML doc.
00256      */
00257     bool validate();
00258 
00259     /**
00260         Parse the given DTD and try to validate this document against it. If
00261         the DTD is valid, and the document is valid, this member function
00262         will return true.
00263 
00264         If it returns false, you may want to send the document through
00265         xmllint to get the actual error messages.
00266 
00267         This member function will add the parsed DTD to this document as the
00268         external subset after the validation. If there is already an external
00269         DTD attached to this document it will be removed and deleted.
00270 
00271         @param dtdname A filename or URL for the DTD to use.
00272         @return True if the document is valid.
00273         @return False if there was a problem with the DTD or XML doc.
00274      */
00275     bool validate(const char *dtdname);
00276 
00277     /**
00278         Returns the number of child nodes of this document. This will always
00279         be at least one, since all xmlwrapp documents must have a root node.
00280         This member function is useful to find out how many document children
00281         there are, including processing instructions, comments, etc.
00282 
00283         @return The number of children nodes that this document has.
00284      */
00285     size_type size() const;
00286 
00287     /**
00288         Get an iterator to the first child node of this document. If what you
00289         really wanted was the root node (the first element) you should use
00290         the get_root_node() member function instead.
00291 
00292         @return A xml::node::iterator that points to the first child node.
00293         @return An end iterator if there are no children in this document
00294      */
00295     node::iterator begin();
00296 
00297     /**
00298         Get a const_iterator to the first child node of this document. If
00299         what you really wanted was the root node (the first element) you
00300         should use the get_root_node() member function instead.
00301 
00302         @return A xml::node::const_iterator that points to the first child node.
00303         @return An end const_iterator if there are no children in this document.
00304      */
00305     node::const_iterator begin() const;
00306 
00307     /**
00308         Get an iterator that points one past the last child node for this
00309         document.
00310 
00311         @return An end xml::node::iterator.
00312      */
00313     node::iterator end();
00314 
00315     /**
00316         Get a const_iterator that points one past the last child node for
00317         this document.
00318 
00319         @return An end xml::node::const_iterator.
00320      */
00321     node::const_iterator end() const;
00322 
00323     /**
00324         Add a child xml::node to this document. You should not add a element
00325         type node, since there can only be one root node. This member
00326         function is only useful for adding processing instructions, comments,
00327         etc.. If you do try to add a node of type element, an exception will
00328         be thrown.
00329 
00330         @param child The child xml::node to add.
00331      */
00332     void push_back (const node &child);
00333 
00334     /**
00335         Insert a new child node. The new node will be inserted at the end of
00336         the child list. This is similar to the xml::node::push_back member
00337         function except that an iterator to the inserted node is returned.
00338 
00339         The rules from the push_back member function apply here. Don't add a
00340         node of type element.
00341 
00342         @param n The node to insert as a child of this document.
00343         @return An iterator that points to the newly inserted node.
00344         @see xml::document::push_back
00345      */
00346     node::iterator insert (const node &n);
00347 
00348     /**
00349         Insert a new child node. The new node will be inserted before the
00350         node pointed to by the given iterator.
00351 
00352         The rules from the push_back member function apply here. Don't add a
00353         node of type element.
00354 
00355         @param position An iterator that points to the location where the new node should be inserted (before it).
00356         @param n The node to insert as a child of this document.
00357         @return An iterator that points to the newly inserted node.
00358         @see xml::document::push_back
00359      */
00360     node::iterator insert(node::iterator position, const node &n);
00361 
00362     /**
00363         Replace the node pointed to by the given iterator with another node.
00364         The old node will be removed, including all its children, and
00365         replaced with the new node. This will invalidate any iterators that
00366         point to the node to be replaced, or any pointers or references to
00367         that node.
00368 
00369         Do not replace this root node with this member function. The same
00370         rules that apply to push_back apply here. If you try to replace a
00371         node of type element, an exception will be thrown.
00372 
00373         @param old_node An iterator that points to the node that should be removed.
00374         @param new_node The node to put in old_node's place.
00375         @return An iterator that points to the new node.
00376         @see xml::document::push_back
00377      */
00378     node::iterator replace(node::iterator old_node, const node& new_node);
00379 
00380     /**
00381         Erase the node that is pointed to by the given iterator. The node
00382         and all its children will be removed from this node. This will
00383         invalidate any iterators that point to the node to be erased, or any
00384         pointers or references to that node.
00385 
00386         Do not remove the root node using this member function. The same
00387         rules that apply to push_back apply here. If you try to erase the
00388         root node, an exception will be thrown.
00389 
00390         @param to_erase An iterator that points to the node to be erased.
00391         @return An iterator that points to the node after the one being erased.
00392         @see xml::document::push_back
00393      */
00394     node::iterator erase(node::iterator to_erase);
00395 
00396     /**
00397         Erase all nodes in the given range, from frist to last. This will
00398         invalidate any iterators that point to the nodes to be erased, or any
00399         pointers or references to those nodes.
00400 
00401         Do not remove the root node using this member function. The same
00402         rules that apply to push_back apply here. If you try to erase the
00403         root node, an exception will be thrown.
00404 
00405         @param first The first node in the range to be removed.
00406         @param last An iterator that points one past the last node to erase. Think xml::node::end().
00407         @return An iterator that points to the node after the last one being erased.
00408         @see xml::document::push_back
00409      */
00410     node::iterator erase(node::iterator first, node::iterator last);
00411 
00412     /**
00413         Convert the XML document tree into XML text data and place it into
00414         the given string.
00415 
00416         @param s The string to place the XML text data.
00417      */
00418     void save_to_string(std::string& s) const;
00419 
00420     /**
00421         Convert the XML document tree into XML text data and place it into
00422         the given filename.
00423 
00424         @param filename The name of the file to place the XML text data into.
00425         @param compression_level 0 is no compression, 1-9 allowed, where 1 is
00426                                  for better speed, and 9 is for smaller size
00427         @return True if the data was saved successfully.
00428         @return False otherwise.
00429      */
00430     bool save_to_file(const char *filename, int compression_level = 0) const;
00431 
00432     /**
00433         Convert the XML document tree into XML text data and then insert it
00434         into the given stream.
00435 
00436         @param stream The stream to insert the XML into.
00437         @param doc The document to insert.
00438         @return The stream from the first parameter.
00439      */
00440     friend std::ostream& operator<< (std::ostream &stream, const document &doc);
00441 
00442 private:
00443     impl::doc_impl *pimpl_;
00444 
00445     void set_doc_data (void *data);
00446     void set_doc_data_from_xslt (void *data, xslt::impl::result *xr);
00447     void* get_doc_data();
00448     void* get_doc_data_read_only() const;
00449     void* release_doc_data();
00450 
00451     friend class tree_parser;
00452     friend class xslt::stylesheet;
00453 };
00454 
00455 } // namespace xml
00456 
00457 #endif // _xmlwrapp_document_h_