3
3
4
4
module OAI
5
5
module Harvester
6
-
7
6
class Harvest
7
+ DIRECTORY_LAYOUT = "%Y/%m" . freeze
8
8
9
9
def initialize ( config = nil , directory = nil , date = nil )
10
10
@config = config || Config . load
@@ -43,22 +43,27 @@ def harvest(site)
43
43
# Allow a from date to be passed in
44
44
opts [ :from ] = earliest ( opts [ :url ] ) unless opts [ :from ]
45
45
opts . delete ( :set ) if 'all' == opts [ :set ]
46
-
47
46
begin
48
47
# Connect, and download
49
48
file , records = call ( opts . delete ( :url ) , opts )
50
49
51
- # Move document to storage directory
52
- dir = File . join ( @directory , date_based_directory ( harvest_time ) )
53
- FileUtils . mkdir_p dir
54
- FileUtils . mv ( file . path ,
55
- File . join ( dir , "#{ site } -#{ filename ( Time . parse ( opts [ :from ] ) ,
56
- harvest_time ) } .xml.gz") )
50
+ # Move document to storage directory if configured
51
+ if @directory
52
+ directory_layout = @config . layouts [ site ] if @config . layouts
53
+ dir = File . join ( @directory , date_based_directory ( harvest_time , directory_layout ) )
54
+ FileUtils . mkdir_p dir
55
+ FileUtils . mv ( file . path ,
56
+ File . join ( dir , "#{ site } -#{ filename ( Time . parse ( opts [ :from ] ) ,
57
+ harvest_time ) } .xml.gz") )
58
+ else
59
+ puts "no configured destination for temp file" if @interactive
60
+ end
57
61
@config . sites [ site ] [ 'last' ] = harvest_time
58
- rescue
59
- raise $! unless $!. respond_to? ( :code )
60
- raise $! if not @interactive || "noRecordsMatch" != $!. code
61
- puts "No new records available"
62
+ rescue OAI ::NoMatchException
63
+ puts "No new records available" if @interactive
64
+ rescue OAI ::Exception => ex
65
+ raise ex if not @interactive
66
+ puts ex . message
62
67
end
63
68
end
64
69
@@ -69,15 +74,15 @@ def call(url, opts)
69
74
records = 0 ;
70
75
client = OAI ::Client . new ( url , :parser => @parser )
71
76
provider_config = client . identify
72
-
77
+
73
78
file = Tempfile . new ( 'oai_data' )
74
79
gz = Zlib ::GzipWriter . new ( file )
75
80
gz << "<?xml version=\" 1.0\" encoding=\" UTF-8\" ?>\n "
76
81
gz << "<records>"
77
82
begin
78
83
response = client . list_records ( options )
79
- get_records ( response . doc ) . each do |rec |
80
- gz << rec
84
+ response . each do |rec |
85
+ gz << rec . _source
81
86
records += 1
82
87
end
83
88
puts "#{ records } records retrieved" if @interactive
@@ -89,8 +94,8 @@ def call(url, opts)
89
94
puts "\n resumption token recieved, continuing" if @interactive
90
95
response = client . list_records ( :resumption_token =>
91
96
response . resumption_token )
92
- get_records ( response . doc ) . each do |rec |
93
- gz << rec
97
+ response . each do |rec |
98
+ gz << rec . _source
94
99
records += 1
95
100
end
96
101
puts "#{ records } records retrieved" if @interactive
@@ -118,8 +123,9 @@ def build_options_hash(site)
118
123
options
119
124
end
120
125
121
- def date_based_directory ( time )
122
- "#{ time . strftime ( DIRECTORY_LAYOUT ) } "
126
+ def date_based_directory ( time , directory_layout = nil )
127
+ directory_layout ||= Harvest ::DIRECTORY_LAYOUT
128
+ "#{ time . strftime ( directory_layout ) } "
123
129
end
124
130
125
131
def filename ( from_time , until_time )
0 commit comments