FeedNormalizer::RubyRssParser

Public Class Methods

parse(xml, loose) click to toggle source
# File lib/parsers/rss.rb, line 28
def self.parse(xml, loose)
  begin
    rss = parser.parse(xml)
  rescue Exception => e
    #puts "Parser #{parser} failed because #{e.message.gsub("\n",', ')}"
    return nil
  end

  # check for channel to make sure we're only dealing with RSS.
  rss && rss.respond_to?(:channel) ? package(rss, loose) : nil
end
parser() click to toggle source
# File lib/parsers/rss.rb, line 24
def self.parser
  RSS::Parser
end
priority() click to toggle source

Fairly high priority; a fast and strict parser.

# File lib/parsers/rss.rb, line 41
def self.priority
  100
end

Protected Class Methods

package(rss, loose) click to toggle source
# File lib/parsers/rss.rb, line 47
def self.package(rss, loose)
  feed = Feed.new(self)

  # channel elements
  feed_mapping = {
    :generator => :generator,
    :title => :title,
    :urls => :link,
    :description => :description,
    :copyright => :copyright,
    :authors => :managingEditor,
    :last_updated => [:lastBuildDate, :pubDate, :dc_date],
    :id => :guid,
    :ttl => :ttl
  }

  # make two passes, to catch all possible root elements
  map_functions!(feed_mapping, rss, feed)
  map_functions!(feed_mapping, rss.channel, feed)

  # custom channel elements
  feed.image = rss.image ? rss.image.url : nil
  feed.skip_hours = skip(rss, :skipHours)
  feed.skip_days = skip(rss, :skipDays)

  # item elements
  item_mapping = {
    :date_published => [:pubDate, :dc_date],
    :urls => :link,
    :enclosures => :enclosure,
    :description => :description,
    :content => [:content_encoded, :description],
    :title => :title,
    :authors => [:author, :dc_creator],
    :last_updated => [:pubDate, :dc_date] # This is effectively an alias for date_published for this parser.
  }

  rss.items.each do |rss_item|
    feed_entry = Entry.new
    map_functions!(item_mapping, rss_item, feed_entry)

    # custom item elements
    feed_entry.id = rss_item.guid.content if rss_item.respond_to?(:guid) && rss_item.guid
    feed_entry.copyright = rss.copyright if rss_item.respond_to? :copyright
    feed_entry.categories = loose ?
                              rss_item.categories.collect{|c|c.content} :
                              [rss_item.categories.first.content] rescue []

    feed.entries << feed_entry
  end

  feed
end
skip(parser, attribute) click to toggle source
# File lib/parsers/rss.rb, line 101
def self.skip(parser, attribute)
  case attribute
    when :skipHours then attributes = :hours
    when :skipDays then attributes = :days
  end
  channel = parser.channel

  return nil unless channel.respond_to?(attribute) && a = channel.send(attribute)
  a.send(attributes).collect{|e| e.content}
end

[Validate]

Generated with the Darkfish Rdoc Generator 2.