Class | Yapra::Plugin::Feed::Custom |
In: |
lib-plugins/yapra/plugin/feed/custom.rb
|
Parent: | Yapra::Plugin::MechanizeBase |
generate rss feed from web page.
example:
- module: Feed::Custom config: url: 'http://www.fraction.jp/' extract_xpath: capture: '//div' split: '//div[@class="test"]' description: '//div' link: '//li[2]' title: '//p' apply_template_after_extracted: content_encoded: '<div><%= title %></div>'
# File lib-plugins/yapra/plugin/feed/custom.rb, line 22 22: def run(data) 23: urls = 24: if config['url'].kind_of?(Array) 25: config['url'] 26: else 27: [ config['url'] ] 28: end 29: xconfig = config['extract_xpath'] 30: wait = config['wait'] || 1 31: capture = xconfig['capture'] 32: split = xconfig['split'] 33: 34: xconfig.delete('capture') 35: xconfig.delete('split') 36: 37: urls.each do |url| 38: logger.debug("Process: #{url}") 39: page = agent.get(url) 40: root = page.root 41: 42: if capture 43: root = root.at(capture) 44: end 45: 46: root.search(split).each do |element| 47: item = RSS::RDF::Item.new 48: 49: extract_attribute_from element, item, binding 50: 51: data << item 52: end 53: sleep wait 54: end 55: 56: data 57: end