Module | Hpricot::Traverse |
In: |
lib/hpricot/elements.rb
lib/hpricot/modules.rb lib/hpricot/traverse.rb |
# File lib/hpricot/elements.rb, line 375 def self.filter(tok, &blk) define_method("filter[#{tok.is_a?(String) ? tok : tok.inspect}]", &blk) end
Find the first matching node for the CSS or XPath expr string.
# File lib/hpricot/traverse.rb, line 341 def at(expr) search(expr).first end
Is this object a stranded end tag?
# File lib/hpricot/traverse.rb, line 21 def bogusetag?() BogusETag::Trav === self end
Find children of a given tag_name.
ele.children_of_type('p') #=> [...array of paragraphs...]
# File lib/hpricot/traverse.rb, line 390 def children_of_type(tag_name) if respond_to? :children children.find_all do |x| x.respond_to?(:pathname) && x.pathname == tag_name end end end
Builds a unique CSS string for this node, from the root of the document containing it.
# File lib/hpricot/traverse.rb, line 226 def css_path if elem? and has_attribute? 'id' "##{get_attribute('id')}" else sim, i, id = 0, 0, 0 parent.children.each do |e| id = sim if e == self sim += 1 if e.pathname == self.pathname end if parent.children p = parent.css_path p = p ? "#{p} > #{self.pathname}" : self.pathname p += ":nth(#{id})" if sim >= 2 p end end
Is this object the enclosing HTML or XML document?
# File lib/hpricot/traverse.rb, line 7 def doc?() Doc::Trav === self end
Is this object a doctype tag?
# File lib/hpricot/traverse.rb, line 15 def doctype?() DocType::Trav === self end
Is this object an HTML or XML element?
# File lib/hpricot/traverse.rb, line 9 def elem?() Elem::Trav === self end
Find all nodes which follow the current one.
# File lib/hpricot/traverse.rb, line 114 def following sibs = parent.children si = sibs.index(self) + 1 return Elements[*sibs[si...sibs.length]] end
# File lib/hpricot/traverse.rb, line 138 def get_subnode(*indexes) n = self indexes.each {|index| n = n.get_subnode_internal(index) } n end
Builds an HTML string from the contents of this node.
# File lib/hpricot/traverse.rb, line 168 def html(inner = nil, &blk) if inner or blk altered! case inner when Array self.children = inner else self.children = make(inner, &blk) end reparent self.children else if respond_to?(:children) and children children.map { |x| x.output("") }.join else "" end end end
# File lib/hpricot/traverse.rb, line 47 def index(name) i = 0 return i if name == "*" children.each do |x| return i if (x.respond_to?(:name) and name == x.name) or (x.text? and name == "text()") i += 1 end if children -1 end
Inserts new contents into the current node, based on the HTML contained in string inner.
# File lib/hpricot/traverse.rb, line 191 def inner_html=(inner) html(inner || []) end
Builds a string from the text contained in this node. All HTML elements are removed.
# File lib/hpricot/traverse.rb, line 158 def inner_text if respond_to?(:children) and children children.map { |x| x.inner_text }.join else "" end end
Parses an HTML string, making an HTML fragment based on the options used to create the container document.
# File lib/hpricot/traverse.rb, line 25 def make(input = nil, &blk) if parent and parent.respond_to? :make parent.make(input, &blk) else Hpricot.make(input, &blk).children end end
Returns the node neighboring this node to the south: just below it. This method includes text nodes and comments and such.
# File lib/hpricot/traverse.rb, line 91 def next sib = parent.children sib[sib.index(self) + 1] if parent end
Puts together an array of neighboring nodes based on their proximity to this node. So, for example, to get the next node, you could use nodes_at(1). Or, to get the previous node, use <tt>nodes_at(1).
This method also accepts ranges and sets of numbers.
ele.nodes_at(-3..-1, 1..3) # gets three nodes before and three after ele.nodes_at(1, 5, 7) # gets three nodes at offsets below the current node ele.nodes_at(0, 5..6) # the current node and two others
# File lib/hpricot/traverse.rb, line 67 def nodes_at(*pos) sib = parent.children i, si = 0, sib.index(self) pos.map! do |r| if r.is_a?(Range) and r.begin.is_a?(String) r = Range.new(parent.index(r.begin)-si, parent.index(r.end)-si, r.exclude_end?) end r end p pos Elements[* sib.select do |x| sel = case i - si when *pos true end i += 1 sel end ] end
# File lib/hpricot/traverse.rb, line 246 def position parent.children_of_type(self.pathname).index(self) end
Returns to node neighboring this node to the north: just above it. This method includes text nodes and comments and such.
# File lib/hpricot/traverse.rb, line 99 def previous sib = parent.children x = sib.index(self) - 1 sib[x] if sib and x >= 0 end
Is this object an XML processing instruction?
# File lib/hpricot/traverse.rb, line 17 def procins?() ProcIns::Trav === self end
Searches this node for all elements matching the CSS or XPath expr. Returns an Elements array containing the matching nodes. If blk is given, it is used to iterate through the matching set.
# File lib/hpricot/traverse.rb, line 254 def search(expr, &blk) if Range === expr return Elements.expand(at(expr.begin), at(expr.end), expr.exclude_end?) end last = nil nodes = [self] done = [] expr = expr.to_s hist = [] until expr.empty? expr = clean_path(expr) expr.gsub!(%r!^//!, '') case expr when %r!^/?\.\.! last = expr = $' nodes.map! { |node| node.parent } when %r!^[>/]\s*! last = expr = $' nodes = Elements[*nodes.map { |node| node.children if node.respond_to? :children }.flatten.compact] when %r!^\+! last = expr = $' nodes.map! do |node| siblings = node.parent.children siblings[siblings.index(node)+1] end nodes.compact! when %r!^~! last = expr = $' nodes.map! do |node| siblings = node.parent.children siblings[(siblings.index(node)+1)..-1] end nodes.flatten! when %r!^[|,]! last = expr = " #$'" nodes.shift if nodes.first == self done += nodes nodes = [self] else m = expr.match(%r!^([#.]?)([a-z0-9\\*_-]*)!i).to_a after = $' mt = after[%r!:[a-z0-9\\*_-]+!i, 0] oop = false if mt and not (mt == ":not" or Traverse.method_defined? "filter[#{mt}]") after = $' m[2] += mt expr = after end if m[1] == '#' oid = get_element_by_id(m[2]) nodes = oid ? [oid] : [] expr = after else m[2] = "*" if after =~ /^\(\)/ || m[2] == "" || m[1] == "." ret = [] nodes.each do |node| case m[2] when '*' node.traverse_element { |n| ret << n } else if node.respond_to? :get_elements_by_tag_name ret += [*node.get_elements_by_tag_name(m[2])] - [*(node unless last)] end end end nodes = ret end last = nil end hist << expr break if hist[-1] == hist[-2] nodes, expr = Elements.filter(nodes, expr) end nodes = done + nodes.flatten.uniq if blk nodes.each(&blk) self else Elements[*nodes] end end
Is this object an HTML text node?
# File lib/hpricot/traverse.rb, line 11 def text?() Text::Trav === self end
Builds an HTML string from this node and its contents. If you need to write to a stream, try calling output(io) as a method on this object.
# File lib/hpricot/traverse.rb, line 36 def to_html output("") end
Attempts to preserve the original HTML of the document, only outputing new tags for elements which have changed.
# File lib/hpricot/traverse.rb, line 43 def to_original_html output("", :preserve => true) end
Builds a string from the text contained in this node. All HTML elements are removed.
# File lib/hpricot/traverse.rb, line 148 def to_plain_text if respond_to?(:children) and children children.map { |x| x.to_plain_text }.join.strip.gsub(/\n{2,}/, "\n\n") else "" end end
traverse_element traverses elements in the tree. It yields elements in depth first order.
If names are empty, it yields all elements. If non-empty names are given, it should be list of universal names.
A nested element is yielded in depth first order as follows.
t = Hpricot('<a id=0><b><a id=1 /></b><c id=2 /></a>') t.traverse_element("a", "c") {|e| p e} # => {elem <a id="0"> {elem <b> {emptyelem <a id="1">} </b>} {emptyelem <c id="2">} </a>} {emptyelem <a id="1">} {emptyelem <c id="2">}
Universal names are specified as follows.
t = Hpricot(<<'End') <html> <meta name="robots" content="index,nofollow"> <meta name="author" content="Who am I?"> </html> End t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e} # => {emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">} {emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">}
# File lib/hpricot/traverse.rb, line 374 def traverse_element(*names, &block) # :yields: element if names.empty? traverse_all_element(&block) else name_set = {} names.each {|n| name_set[n] = true } traverse_some_element(name_set, &block) end nil end
traverse_text traverses texts in the tree
# File lib/hpricot/traverse.rb, line 680 def traverse_text(&block) # :yields: text traverse_text_internal(&block) nil end
Is this object an XML declaration?
# File lib/hpricot/traverse.rb, line 13 def xmldecl?() XMLDecl::Trav === self end
Builds a unique XPath string for this node, from the root of the document containing it.
# File lib/hpricot/traverse.rb, line 209 def xpath if elem? and has_attribute? 'id' "//#{self.name}[@id='#{get_attribute('id')}']" else sim, id = 0, 0, 0 parent.children.each do |e| id = sim if e == self sim += 1 if e.pathname == self.pathname end if parent.children p = File.join(parent.xpath, self.pathname) p += "[#{id+1}]" if sim >= 2 p end end