Skip to content

Commit 7a636dc

Browse files
committed
add test cases for harvester
- move DIRECTORY_LAYOUT global to OAI::Harvester::Harvest class and make configurable - delegate record parsing to ListRecords response in harvester - do nothing with harvested tempfile if no directory storage is configured - use application exceptions in harvester
1 parent 0722417 commit 7a636dc

File tree

5 files changed

+71
-21
lines changed

5 files changed

+71
-21
lines changed

Rakefile

+7
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ namespace :test do
3434
t.warning = false
3535
end
3636

37+
Rake::TestTask.new('harvester') do |t|
38+
t.libs << ['lib', 'test/harvester']
39+
t.pattern = 'test/harvester/tc_*.rb'
40+
#t.verbose = true
41+
t.warning = false
42+
end
43+
3744
Rake::TestTask.new('provider') do |t|
3845
t.libs << ['lib', 'test/provider']
3946
t.pattern = 'test/provider/tc_*.rb'

bin/oai

-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55

66
require 'optparse'
77

8-
DIRECTORY_LAYOUT = "%Y/%m".freeze
9-
108
require 'oai/harvester'
119

1210
include OAI::Harvester

lib/oai/harvester/harvest.rb

+25-19
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
module OAI
55
module Harvester
6-
76
class Harvest
7+
DIRECTORY_LAYOUT = "%Y/%m".freeze
88

99
def initialize(config = nil, directory = nil, date = nil)
1010
@config = config || Config.load
@@ -43,22 +43,27 @@ def harvest(site)
4343
# Allow a from date to be passed in
4444
opts[:from] = earliest(opts[:url]) unless opts[:from]
4545
opts.delete(:set) if 'all' == opts[:set]
46-
4746
begin
4847
# Connect, and download
4948
file, records = call(opts.delete(:url), opts)
5049

51-
# Move document to storage directory
52-
dir = File.join(@directory, date_based_directory(harvest_time))
53-
FileUtils.mkdir_p dir
54-
FileUtils.mv(file.path,
55-
File.join(dir, "#{site}-#{filename(Time.parse(opts[:from]),
56-
harvest_time)}.xml.gz"))
50+
# Move document to storage directory if configured
51+
if @directory
52+
directory_layout = @config.layouts[site] if @config.layouts
53+
dir = File.join(@directory, date_based_directory(harvest_time, directory_layout))
54+
FileUtils.mkdir_p dir
55+
FileUtils.mv(file.path,
56+
File.join(dir, "#{site}-#{filename(Time.parse(opts[:from]),
57+
harvest_time)}.xml.gz"))
58+
else
59+
puts "no configured destination for temp file" if @interactive
60+
end
5761
@config.sites[site]['last'] = harvest_time
58-
rescue
59-
raise $! unless $!.respond_to?(:code)
60-
raise $! if not @interactive || "noRecordsMatch" != $!.code
61-
puts "No new records available"
62+
rescue OAI::NoMatchException
63+
puts "No new records available" if @interactive
64+
rescue OAI::Exception => ex
65+
raise ex if not @interactive
66+
puts ex.message
6267
end
6368
end
6469

@@ -69,15 +74,15 @@ def call(url, opts)
6974
records = 0;
7075
client = OAI::Client.new(url, :parser => @parser)
7176
provider_config = client.identify
72-
77+
7378
file = Tempfile.new('oai_data')
7479
gz = Zlib::GzipWriter.new(file)
7580
gz << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
7681
gz << "<records>"
7782
begin
7883
response = client.list_records(options)
79-
get_records(response.doc).each do |rec|
80-
gz << rec
84+
response.each do |rec|
85+
gz << rec._source
8186
records += 1
8287
end
8388
puts "#{records} records retrieved" if @interactive
@@ -89,8 +94,8 @@ def call(url, opts)
8994
puts "\nresumption token recieved, continuing" if @interactive
9095
response = client.list_records(:resumption_token =>
9196
response.resumption_token)
92-
get_records(response.doc).each do |rec|
93-
gz << rec
97+
response.each do |rec|
98+
gz << rec._source
9499
records += 1
95100
end
96101
puts "#{records} records retrieved" if @interactive
@@ -118,8 +123,9 @@ def build_options_hash(site)
118123
options
119124
end
120125

121-
def date_based_directory(time)
122-
"#{time.strftime(DIRECTORY_LAYOUT)}"
126+
def date_based_directory(time, directory_layout = nil)
127+
directory_layout ||= Harvest::DIRECTORY_LAYOUT
128+
"#{time.strftime(directory_layout)}"
123129
end
124130

125131
def filename(from_time, until_time)

test/harvester/tc_harvest.rb

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
require 'test_helper_harvester'
2+
3+
class HarvestTest < Test::Unit::TestCase
4+
ONE_HOUR = 3600
5+
EARLIEST_FIXTURE = "1998-05-02T04:00:00Z"
6+
LATEST_FIXTURE = "2005-12-25T05:00:00Z"
7+
def test_harvest
8+
until_value = Time.now.utc - ONE_HOUR
9+
config = OpenStruct.new(sites: { 'test' => { 'url' => 'http://localhost:3333/oai' }})
10+
OAI::Harvester::Harvest.new(config).start
11+
last = config.sites.dig('test', 'last')
12+
assert_kind_of Time, last
13+
assert last >= (until_value + ONE_HOUR), "#{last} < #{(until_value + ONE_HOUR)}"
14+
end
15+
16+
def test_harvest_from_last
17+
from_value = Time.parse(LATEST_FIXTURE).utc
18+
now = Time.now.utc
19+
config = OpenStruct.new(sites: { 'test' => { 'url' => 'http://localhost:3333/oai' }})
20+
OAI::Harvester::Harvest.new(config, nil, from_value).start
21+
last = config.sites.dig('test', 'last')
22+
assert last >= now, "#{last} < #{now}"
23+
end
24+
25+
def test_harvest_after_last
26+
from_value = Time.parse(LATEST_FIXTURE).utc + 1
27+
config = OpenStruct.new(sites: { 'test' => { 'url' => 'http://localhost:3333/oai' }})
28+
OAI::Harvester::Harvest.new(config, nil, from_value).start
29+
last = config.sites.dig('test', 'last')
30+
assert_kind_of NilClass, last
31+
end
32+
end
33+
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
require 'oai'
2+
require 'oai/harvester'
3+
require 'test/unit'
4+
5+
require File.dirname(__FILE__) + '/../client/helpers/provider'
6+
require File.dirname(__FILE__) + '/../client/helpers/test_wrapper'

0 commit comments

Comments
 (0)