Skip to content

Commit

Permalink
finally fixed indentation
Browse files Browse the repository at this point in the history
  • Loading branch information
damog committed Aug 3, 2014
1 parent 854b3c8 commit 7f4241f
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 159 deletions.
4 changes: 2 additions & 2 deletions feedbag.gemspec
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# -*- encoding: utf-8 -*-

Gem::Specification.new do |s|
s.name = %q{feedbag}
s.version = "0.9.2"
s.homepage = "http://github.com/damog/feedbag"
s.rubyforge_project = "feedbag"

s.authors = ["David Moreno", "Derek Willis"]
s.date = %q{2013-12-07}
s.description = %q{Ruby's favorite feed auto-discoverty tool}
Expand Down
310 changes: 155 additions & 155 deletions lib/feedbag.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@

class Feedbag

CONTENT_TYPES = [
'application/x.atom+xml',
'application/atom+xml',
'application/xml',
'text/xml',
'application/rss+xml',
'application/rdf+xml',
].freeze
CONTENT_TYPES = [
'application/x.atom+xml',
'application/atom+xml',
'application/xml',
'text/xml',
'application/rss+xml',
'application/rdf+xml',
].freeze

def self.feed?(url)
new.feed?(url)
Expand All @@ -49,160 +49,160 @@ def initialize
@feeds = []
end

def feed?(url)
# use LWR::Simple.normalize some time
url_uri = URI.parse(url)
url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
url << "?#{url_uri.query}" if url_uri.query
# hack:
url.sub!(/^feed:\/\//, 'http://')

res = Feedbag.find(url)
if res.size == 1 and res.first == url
return true
else
return false
end
end

def find(url, args = {})
url_uri = URI.parse(url)
url = nil
if url_uri.scheme.nil?
url = "http://#{url_uri.to_s}"
elsif url_uri.scheme == "feed"
return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil)
else
url = url_uri.to_s
end
#url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"

# check if feed_valid is avail
def feed?(url)
# use LWR::Simple.normalize some time
url_uri = URI.parse(url)
url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"
url << "?#{url_uri.query}" if url_uri.query

# hack:
url.sub!(/^feed:\/\//, 'http://')

res = Feedbag.find(url)
if res.size == 1 and res.first == url
return true
else
return false
end
end

def find(url, args = {})
url_uri = URI.parse(url)
url = nil
if url_uri.scheme.nil?
url = "http://#{url_uri.to_s}"
elsif url_uri.scheme == "feed"
return self.add_feed(url_uri.to_s.sub(/^feed:\/\//, 'http://'), nil)
else
url = url_uri.to_s
end
#url = "#{url_uri.scheme or 'http'}://#{url_uri.host}#{url_uri.path}"

# check if feed_valid is avail
begin
require "feed_validator"
v = W3C::FeedValidator.new
v.validate_url(url)
return self.add_feed(url, nil) if v.valid?
rescue LoadError
# scoo
rescue REXML::ParseException
# usually indicates timeout
# TODO: actually find out timeout. use Terminator?
# $stderr.puts "Feed looked like feed but might not have passed validation or timed out"
require "feed_validator"
v = W3C::FeedValidator.new
v.validate_url(url)
return self.add_feed(url, nil) if v.valid?
rescue LoadError
# scoo
rescue REXML::ParseException
# usually indicates timeout
# TODO: actually find out timeout. use Terminator?
# $stderr.puts "Feed looked like feed but might not have passed validation or timed out"
rescue => ex
$stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
end

begin
html = open(url) do |f|
content_type = f.content_type.downcase
if content_type == "application/octet-stream" # open failed
content_type = f.meta["content-type"].gsub(/;.*$/, '')
end
if CONTENT_TYPES.include?(content_type)
return self.add_feed(url, nil)
end

doc = Nokogiri::HTML(f.read)

if doc.at("base") and doc.at("base")["href"]
@base_uri = doc.at("base")["href"]
else
@base_uri = nil
end

# first with links
$stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
end

begin
html = open(url) do |f|
content_type = f.content_type.downcase
if content_type == "application/octet-stream" # open failed
content_type = f.meta["content-type"].gsub(/;.*$/, '')
end
if CONTENT_TYPES.include?(content_type)
return self.add_feed(url, nil)
end

doc = Nokogiri::HTML(f.read)

if doc.at("base") and doc.at("base")["href"]
@base_uri = doc.at("base")["href"]
else
@base_uri = nil
end

# first with links
(doc/"atom:link").each do |l|
next unless l["rel"]
if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and l["rel"].downcase == "self"
self.add_feed(l["href"], url, @base_uri)
end
end

(doc/"link").each do |l|
next unless l["rel"]
if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
self.add_feed(l["href"], url, @base_uri)
end
end

(doc/"a").each do |a|
next unless a["href"]
if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/)
self.add_feed(a["href"], url, @base_uri)
end
end

(doc/"a").each do |a|
next unless a["href"]
if self.looks_like_feed?(a["href"])
self.add_feed(a["href"], url, @base_uri)
end
end
next unless l["rel"]
if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and l["rel"].downcase == "self"
self.add_feed(l["href"], url, @base_uri)
end
end

(doc/"link").each do |l|
next unless l["rel"]
if l["type"] and CONTENT_TYPES.include?(l["type"].downcase.strip) and (l["rel"].downcase =~ /alternate/i or l["rel"] == "service.feed")
self.add_feed(l["href"], url, @base_uri)
end
end

(doc/"a").each do |a|
next unless a["href"]
if self.looks_like_feed?(a["href"]) and (a["href"] =~ /\// or a["href"] =~ /#{url_uri.host}/)
self.add_feed(a["href"], url, @base_uri)
end
end

(doc/"a").each do |a|
next unless a["href"]
if self.looks_like_feed?(a["href"])
self.add_feed(a["href"], url, @base_uri)
end
end

# Added support for feeds like http://tabtimes.com/tbfeed/mashable/full.xml
if url.match(/.xml$/) and doc.root and doc.root["xml:base"] and doc.root["xml:base"].strip == url.strip
self.add_feed(url, nil)
self.add_feed(url, nil)
end
end
rescue Timeout::Error => err
$stderr.puts "Timeout error occurred with `#{url}: #{err}'"
rescue OpenURI::HTTPError => the_error
$stderr.puts "Error occurred with `#{url}': #{the_error}"
rescue SocketError => err
$stderr.puts "Socket error occurred with: `#{url}': #{err}"
rescue => ex
$stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
ensure
return @feeds
end
end

def looks_like_feed?(url)
if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
true
else
false
end
end

def add_feed(feed_url, orig_url, base_uri = nil)
# puts "#{feed_url} - #{orig_url}"
url = feed_url.sub(/^feed:/, '').strip

if base_uri
# url = base_uri + feed_url
url = URI.parse(base_uri).merge(feed_url).to_s
end

begin
uri = URI.parse(url)
rescue
puts "Error with `#{url}'"
exit 1
end
unless uri.absolute?
orig = URI.parse(orig_url)
url = orig.merge(url).to_s
end

# verify url is really valid
@feeds.push(url) unless @feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url)
end

# not used. yet.
def _is_http_valid(uri, orig_url)
req = Net::HTTP.get_response(uri)
orig_uri = URI.parse(orig_url)
case req
when Net::HTTPSuccess then
return true
else
return false
end
end
end
rescue Timeout::Error => err
$stderr.puts "Timeout error occurred with `#{url}: #{err}'"
rescue OpenURI::HTTPError => the_error
$stderr.puts "Error occurred with `#{url}': #{the_error}"
rescue SocketError => err
$stderr.puts "Socket error occurred with: `#{url}': #{err}"
rescue => ex
$stderr.puts "#{ex.class} error occurred with: `#{url}': #{ex.message}"
ensure
return @feeds
end

end

def looks_like_feed?(url)
if url =~ /(\.(rdf|xml|rdf|rss)$|feed=(rss|atom)|(atom|feed)\/?$)/i
true
else
false
end
end

def add_feed(feed_url, orig_url, base_uri = nil)
# puts "#{feed_url} - #{orig_url}"
url = feed_url.sub(/^feed:/, '').strip

if base_uri
# url = base_uri + feed_url
url = URI.parse(base_uri).merge(feed_url).to_s
end

begin
uri = URI.parse(url)
rescue
puts "Error with `#{url}'"
exit 1
end
unless uri.absolute?
orig = URI.parse(orig_url)
url = orig.merge(url).to_s
end

# verify url is really valid
@feeds.push(url) unless @feeds.include?(url)# if self._is_http_valid(URI.parse(url), orig_url)
end

# not used. yet.
def _is_http_valid(uri, orig_url)
req = Net::HTTP.get_response(uri)
orig_uri = URI.parse(orig_url)
case req
when Net::HTTPSuccess then
return true
else
return false
end
end
end

if __FILE__ == $0
Expand Down
4 changes: 2 additions & 2 deletions test/feedbag_test.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
require 'test_helper'

class FeedbagTest < Test::Unit::TestCase

context "Feedbag.feed? should know that an RSS url is a feed" do
setup do
@rss_url = 'http://example.com/rss/'
Expand All @@ -11,7 +11,7 @@ class FeedbagTest < Test::Unit::TestCase
assert Feedbag.feed?(@rss_url)
end
end

context "Feedbag.feed? should know that an RSS url with parameters is a feed" do
setup do
@rss_url = "http://example.com/data?format=rss"
Expand Down

0 comments on commit 7f4241f

Please sign in to comment.