1
1
//! On-disk storage
2
2
3
+ mod column;
4
+ mod rowset;
5
+
3
6
use std:: collections:: HashMap ;
4
7
use std:: path:: PathBuf ;
8
+ use std:: sync:: atomic:: AtomicU32 ;
5
9
use std:: sync:: { Arc , RwLock } ;
6
10
7
11
use anyhow:: anyhow;
8
- use bytes:: { Buf , BufMut } ;
9
12
10
- use crate :: array:: { Array , ArrayBuilder , ArrayImpl , DataChunk , I32Array , I32ArrayBuilder } ;
13
+ use self :: rowset:: { DiskRowset , RowSetBuilder } ;
14
+ use crate :: array:: DataChunk ;
11
15
use crate :: catalog:: { ColumnDesc , TableRefId } ;
12
16
13
17
/// The error type of storage operations.
@@ -26,13 +30,16 @@ pub struct DiskStorage {
26
30
/// All tables in the current storage engine.
27
31
tables : RwLock < HashMap < TableRefId , StorageTableRef > > ,
28
32
33
+ /// Generator for RowSet id.
34
+ rowset_id_generator : Arc < AtomicU32 > ,
35
+
29
36
/// The storage options.
30
37
options : Arc < StorageOptions > ,
31
38
}
32
39
33
40
pub struct StorageOptions {
34
41
/// The directory of the storage
35
- base_path : PathBuf ,
42
+ pub base_path : PathBuf ,
36
43
}
37
44
38
45
pub fn err ( error : impl Into < anyhow:: Error > ) -> StorageError {
@@ -49,22 +56,21 @@ pub struct DiskTable {
49
56
50
57
/// The storage options.
51
58
options : Arc < StorageOptions > ,
52
- }
53
59
54
- impl Default for DiskStorage {
55
- fn default ( ) -> Self {
56
- Self :: new ( )
57
- }
60
+ /// Generator for RowSet id.
61
+ rowset_id_generator : Arc < AtomicU32 > ,
62
+
63
+ /// RowSets in the table
64
+ rowsets : RwLock < Vec < DiskRowset > > ,
58
65
}
59
66
60
67
impl DiskStorage {
61
68
/// Create a new in-memory storage.
62
- pub fn new ( ) -> Self {
69
+ pub fn new ( options : StorageOptions ) -> Self {
63
70
DiskStorage {
64
71
tables : RwLock :: new ( HashMap :: new ( ) ) ,
65
- options : Arc :: new ( StorageOptions {
66
- base_path : "risinglight.db" . into ( ) ,
67
- } ) ,
72
+ options : Arc :: new ( options) ,
73
+ rowset_id_generator : Arc :: new ( AtomicU32 :: new ( 0 ) ) ,
68
74
}
69
75
}
70
76
@@ -75,6 +81,8 @@ impl DiskStorage {
75
81
id,
76
82
options : self . options . clone ( ) ,
77
83
column_descs : column_descs. into ( ) ,
84
+ rowsets : RwLock :: new ( Vec :: new ( ) ) ,
85
+ rowset_id_generator : self . rowset_id_generator . clone ( ) ,
78
86
} ;
79
87
let res = tables. insert ( id, table. into ( ) ) ;
80
88
if res. is_some ( ) {
@@ -93,61 +101,108 @@ impl DiskStorage {
93
101
}
94
102
}
95
103
96
- /// Encode an `I32Array` into a `Vec<u8>`.
97
- fn encode_int32_column ( a : & I32Array ) -> StorageResult < Vec < u8 > > {
98
- let mut buffer = Vec :: with_capacity ( a. len ( ) * 4 ) ;
99
- for item in a. iter ( ) {
100
- if let Some ( item) = item {
101
- buffer. put_i32_le ( * item) ;
102
- } else {
103
- return Err ( anyhow ! ( "nullable encoding not supported!" ) . into ( ) ) ;
104
- }
104
+ impl DiskTable {
105
+ /// Start a transaction which only contains write.
106
+ pub async fn write ( self : & Arc < Self > ) -> StorageResult < DiskTransaction > {
107
+ let rowsets = self . rowsets . read ( ) . unwrap ( ) ;
108
+ Ok ( DiskTransaction {
109
+ read_only : false ,
110
+ table : self . clone ( ) ,
111
+ rowset_snapshot : rowsets. clone ( ) ,
112
+ builder : None ,
113
+ finished : false ,
114
+ } )
105
115
}
106
- Ok ( buffer)
107
- }
108
116
109
- fn decode_int32_column ( mut data : & [ u8 ] ) -> StorageResult < I32Array > {
110
- let mut builder = I32ArrayBuilder :: with_capacity ( data. len ( ) / 4 ) ;
111
- while data. has_remaining ( ) {
112
- builder. push ( Some ( & data. get_i32_le ( ) ) ) ;
117
+ /// Start a transaction which only contains read.
118
+ pub async fn read ( self : & Arc < Self > ) -> StorageResult < DiskTransaction > {
119
+ let rowsets = self . rowsets . read ( ) . unwrap ( ) ;
120
+ Ok ( DiskTransaction {
121
+ read_only : true ,
122
+ table : self . clone ( ) ,
123
+ rowset_snapshot : rowsets. clone ( ) ,
124
+ builder : None ,
125
+ finished : false ,
126
+ } )
113
127
}
114
- Ok ( builder. finish ( ) )
115
- }
116
128
117
- impl DiskTable {
118
- fn table_path ( & self ) -> PathBuf {
129
+ pub fn table_path ( & self ) -> PathBuf {
119
130
self . options . base_path . join ( self . id . table_id . to_string ( ) )
120
131
}
121
132
122
- fn column_path ( & self , column_id : usize ) -> PathBuf {
123
- self . table_path ( ) . join ( format ! ( "{}.col" , column_id ) )
133
+ pub fn rowset_path_of ( & self , rowset_id : u32 ) -> PathBuf {
134
+ self . table_path ( ) . join ( rowset_id . to_string ( ) )
124
135
}
136
+ }
137
+
138
+ pub struct DiskTransaction {
139
+ /// If this txn is read only.
140
+ read_only : bool ,
141
+
142
+ /// Reference to table object
143
+ table : Arc < DiskTable > ,
144
+
145
+ /// Current snapshot of RowSets
146
+ rowset_snapshot : Vec < DiskRowset > ,
147
+
148
+ /// Builder for the RowSet
149
+ builder : Option < RowSetBuilder > ,
125
150
151
+ /// Indicates whether the transaction is committed or aborted. If
152
+ /// the [`SecondaryTransaction`] object is dropped without finishing,
153
+ /// the transaction will panic.
154
+ finished : bool ,
155
+ }
156
+
157
+ impl Drop for DiskTransaction {
158
+ fn drop ( & mut self ) {
159
+ if !self . finished {
160
+ warn ! ( "Transaction dropped without committing or aborting" ) ;
161
+ }
162
+ }
163
+ }
164
+
165
+ impl DiskTransaction {
126
166
/// Append a chunk to the table.
127
- pub async fn append ( & self , chunk : DataChunk ) -> StorageResult < ( ) > {
128
- for ( idx, column) in chunk. arrays ( ) . iter ( ) . enumerate ( ) {
129
- if let ArrayImpl :: Int32 ( column) = column {
130
- let column_path = self . column_path ( idx) ;
131
- let data = encode_int32_column ( column) ?;
132
- tokio:: fs:: create_dir_all ( column_path. parent ( ) . unwrap ( ) )
133
- . await
134
- . map_err ( err) ?;
135
- tokio:: fs:: write ( column_path, data) . await . map_err ( err) ?;
136
- } else {
137
- return Err ( anyhow ! ( "unsupported column type" ) . into ( ) ) ;
138
- }
167
+ pub async fn append ( & mut self , chunk : DataChunk ) -> StorageResult < ( ) > {
168
+ if self . read_only {
169
+ return Err ( anyhow ! ( "cannot append chunks in read only txn!" ) . into ( ) ) ;
170
+ }
171
+ if self . builder . is_none ( ) {
172
+ self . builder = Some ( RowSetBuilder :: new ( self . table . column_descs . clone ( ) ) ) ;
139
173
}
174
+ let builder = self . builder . as_mut ( ) . unwrap ( ) ;
175
+
176
+ builder. append ( chunk) ?;
177
+
178
+ Ok ( ( ) )
179
+ }
180
+
181
+ pub async fn commit ( mut self ) -> StorageResult < ( ) > {
182
+ self . finished = true ;
183
+
184
+ if let Some ( builder) = self . builder . take ( ) {
185
+ use std:: sync:: atomic:: Ordering :: SeqCst ;
186
+ let rowset_id = self . table . rowset_id_generator . fetch_add ( 1 , SeqCst ) ;
187
+ let rowset_path = self
188
+ . table
189
+ . options
190
+ . base_path
191
+ . join ( self . table . rowset_path_of ( rowset_id) ) ;
192
+ let rowset = builder. flush ( rowset_id, rowset_path) . await ?;
193
+ let mut rowsets = self . table . rowsets . write ( ) . unwrap ( ) ;
194
+ rowsets. push ( rowset) ;
195
+ }
196
+
140
197
Ok ( ( ) )
141
198
}
142
199
143
200
/// Get all chunks of the table.
144
201
pub async fn all_chunks ( & self ) -> StorageResult < Vec < DataChunk > > {
145
- let mut columns = vec ! [ ] ;
146
- for ( idx, _) in self . column_descs . iter ( ) . enumerate ( ) {
147
- let column_path = self . column_path ( idx) ;
148
- let data = tokio:: fs:: read ( column_path) . await . map_err ( err) ?;
149
- columns. push ( decode_int32_column ( & data) ?) ;
202
+ let mut chunks = vec ! [ ] ;
203
+ for rowset in & self . rowset_snapshot {
204
+ chunks. push ( rowset. as_chunk ( ) . await ?) ;
150
205
}
151
- Ok ( vec ! [ columns . into_iter ( ) . map ( ArrayImpl :: Int32 ) . collect ( ) ] )
206
+ Ok ( chunks )
152
207
}
153
208
}
0 commit comments