Skip to content
This repository was archived by the owner on Apr 10, 2024. It is now read-only.

Commit 27199d5

Browse files
committed
initial zap import
these files all originated from: blevesearch/bleve/index/scorch/segment/zap which was last modified on Aug 23, 2019 with git SHA: f1c9cb5659531140daeb291a3d9406b42821880d
0 parents  commit 27199d5

23 files changed

+8224
-0
lines changed

README.md

+158
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
# zap file format
2+
3+
Advanced ZAP File Format Documentation is [here](zap.md).
4+
5+
The file is written in the reverse order that we typically access data. This helps us write in one pass since later sections of the file require file offsets of things we've already written.
6+
7+
Current usage:
8+
9+
- mmap the entire file
10+
- crc-32 bytes and version are in fixed position at end of the file
11+
- reading remainder of footer could be version specific
12+
- remainder of footer gives us:
13+
- 3 important offsets (docValue , fields index and stored data index)
14+
- 2 important values (number of docs and chunk factor)
15+
- field data is processed once and memoized onto the heap so that we never have to go back to disk for it
16+
- access to stored data by doc number means first navigating to the stored data index, then accessing a fixed position offset into that slice, which gives us the actual address of the data. the first bytes of that section tell us the size of data so that we know where it ends.
17+
- access to all other indexed data follows the following pattern:
18+
- first know the field name -> convert to id
19+
- next navigate to term dictionary for that field
20+
- some operations stop here and do dictionary ops
21+
- next use dictionary to navigate to posting list for a specific term
22+
- walk posting list
23+
- if necessary, walk posting details as we go
24+
- if location info is desired, consult location bitmap to see if it is there
25+
26+
## stored fields section
27+
28+
- for each document
29+
- preparation phase:
30+
- produce a slice of metadata bytes and data bytes
31+
- produce these slices in field id order
32+
- field value is appended to the data slice
33+
- metadata slice is varint encoded with the following values for each field value
34+
- field id (uint16)
35+
- field type (byte)
36+
- field value start offset in uncompressed data slice (uint64)
37+
- field value length (uint64)
38+
- field number of array positions (uint64)
39+
- one additional value for each array position (uint64)
40+
- compress the data slice using snappy
41+
- file writing phase:
42+
- remember the start offset for this document
43+
- write out meta data length (varint uint64)
44+
- write out compressed data length (varint uint64)
45+
- write out the metadata bytes
46+
- write out the compressed data bytes
47+
48+
## stored fields idx
49+
50+
- for each document
51+
- write start offset (remembered from previous section) of stored data (big endian uint64)
52+
53+
With this index and a known document number, we have direct access to all the stored field data.
54+
55+
## posting details (freq/norm) section
56+
57+
- for each posting list
58+
- produce a slice containing multiple consecutive chunks (each chunk is varint stream)
59+
- produce a slice remembering offsets of where each chunk starts
60+
- preparation phase:
61+
- for each hit in the posting list
62+
- if this hit is in next chunk close out encoding of last chunk and record offset start of next
63+
- encode term frequency (uint64)
64+
- encode norm factor (float32)
65+
- file writing phase:
66+
- remember start position for this posting list details
67+
- write out number of chunks that follow (varint uint64)
68+
- write out length of each chunk (each a varint uint64)
69+
- write out the byte slice containing all the chunk data
70+
71+
If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
72+
73+
## posting details (location) section
74+
75+
- for each posting list
76+
- produce a slice containing multiple consecutive chunks (each chunk is varint stream)
77+
- produce a slice remembering offsets of where each chunk starts
78+
- preparation phase:
79+
- for each hit in the posting list
80+
- if this hit is in next chunk close out encoding of last chunk and record offset start of next
81+
- encode field (uint16)
82+
- encode field pos (uint64)
83+
- encode field start (uint64)
84+
- encode field end (uint64)
85+
- encode number of array positions to follow (uint64)
86+
- encode each array position (each uint64)
87+
- file writing phase:
88+
- remember start position for this posting list details
89+
- write out number of chunks that follow (varint uint64)
90+
- write out length of each chunk (each a varint uint64)
91+
- write out the byte slice containing all the chunk data
92+
93+
If you know the doc number you're interested in, this format lets you jump to the correct chunk (docNum/chunkFactor) directly and then seek within that chunk until you find it.
94+
95+
## postings list section
96+
97+
- for each posting list
98+
- preparation phase:
99+
- encode roaring bitmap posting list to bytes (so we know the length)
100+
- file writing phase:
101+
- remember the start position for this posting list
102+
- write freq/norm details offset (remembered from previous, as varint uint64)
103+
- write location details offset (remembered from previous, as varint uint64)
104+
- write length of encoded roaring bitmap
105+
- write the serialized roaring bitmap data
106+
107+
## dictionary
108+
109+
- for each field
110+
- preparation phase:
111+
- encode vellum FST with dictionary data pointing to file offset of posting list (remembered from previous)
112+
- file writing phase:
113+
- remember the start position of this persistDictionary
114+
- write length of vellum data (varint uint64)
115+
- write out vellum data
116+
117+
## fields section
118+
119+
- for each field
120+
- file writing phase:
121+
- remember start offset for each field
122+
- write dictionary address (remembered from previous) (varint uint64)
123+
- write length of field name (varint uint64)
124+
- write field name bytes
125+
126+
## fields idx
127+
128+
- for each field
129+
- file writing phase:
130+
- write big endian uint64 of start offset for each field
131+
132+
NOTE: currently we don't know or record the length of this fields index. Instead we rely on the fact that we know it immediately precedes a footer of known size.
133+
134+
## fields DocValue
135+
136+
- for each field
137+
- preparation phase:
138+
- produce a slice containing multiple consecutive chunks, where each chunk is composed of a meta section followed by compressed columnar field data
139+
- produce a slice remembering the length of each chunk
140+
- file writing phase:
141+
- remember the start position of this first field DocValue offset in the footer
142+
- write out number of chunks that follow (varint uint64)
143+
- write out length of each chunk (each a varint uint64)
144+
- write out the byte slice containing all the chunk data
145+
146+
NOTE: currently the meta header inside each chunk gives clue to the location offsets and size of the data pertaining to a given docID and any
147+
read operation leverage that meta information to extract the document specific data from the file.
148+
149+
## footer
150+
151+
- file writing phase
152+
- write number of docs (big endian uint64)
153+
- write stored field index location (big endian uint64)
154+
- write field index location (big endian uint64)
155+
- write field docValue location (big endian uint64)
156+
- write out chunk factor (big endian uint32)
157+
- write out version (big endian uint32)
158+
- write out file CRC of everything preceding this (big endian uint32)

build.go

+151
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
// Copyright (c) 2017 Couchbase, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package zap
16+
17+
import (
18+
"bufio"
19+
"github.com/couchbase/vellum"
20+
"math"
21+
"os"
22+
)
23+
24+
const Version uint32 = 11
25+
26+
const Type string = "zap"
27+
28+
const fieldNotUninverted = math.MaxUint64
29+
30+
// PersistSegmentBase persists SegmentBase in the zap file format.
31+
func PersistSegmentBase(sb *SegmentBase, path string) error {
32+
flag := os.O_RDWR | os.O_CREATE
33+
34+
f, err := os.OpenFile(path, flag, 0600)
35+
if err != nil {
36+
return err
37+
}
38+
39+
cleanup := func() {
40+
_ = f.Close()
41+
_ = os.Remove(path)
42+
}
43+
44+
br := bufio.NewWriter(f)
45+
46+
_, err = br.Write(sb.mem)
47+
if err != nil {
48+
cleanup()
49+
return err
50+
}
51+
52+
err = persistFooter(sb.numDocs, sb.storedIndexOffset, sb.fieldsIndexOffset, sb.docValueOffset,
53+
sb.chunkFactor, sb.memCRC, br)
54+
if err != nil {
55+
cleanup()
56+
return err
57+
}
58+
59+
err = br.Flush()
60+
if err != nil {
61+
cleanup()
62+
return err
63+
}
64+
65+
err = f.Sync()
66+
if err != nil {
67+
cleanup()
68+
return err
69+
}
70+
71+
err = f.Close()
72+
if err != nil {
73+
cleanup()
74+
return err
75+
}
76+
77+
return nil
78+
}
79+
80+
func persistStoredFieldValues(fieldID int,
81+
storedFieldValues [][]byte, stf []byte, spf [][]uint64,
82+
curr int, metaEncode varintEncoder, data []byte) (
83+
int, []byte, error) {
84+
for i := 0; i < len(storedFieldValues); i++ {
85+
// encode field
86+
_, err := metaEncode(uint64(fieldID))
87+
if err != nil {
88+
return 0, nil, err
89+
}
90+
// encode type
91+
_, err = metaEncode(uint64(stf[i]))
92+
if err != nil {
93+
return 0, nil, err
94+
}
95+
// encode start offset
96+
_, err = metaEncode(uint64(curr))
97+
if err != nil {
98+
return 0, nil, err
99+
}
100+
// end len
101+
_, err = metaEncode(uint64(len(storedFieldValues[i])))
102+
if err != nil {
103+
return 0, nil, err
104+
}
105+
// encode number of array pos
106+
_, err = metaEncode(uint64(len(spf[i])))
107+
if err != nil {
108+
return 0, nil, err
109+
}
110+
// encode all array positions
111+
for _, pos := range spf[i] {
112+
_, err = metaEncode(pos)
113+
if err != nil {
114+
return 0, nil, err
115+
}
116+
}
117+
118+
data = append(data, storedFieldValues[i]...)
119+
curr += len(storedFieldValues[i])
120+
}
121+
122+
return curr, data, nil
123+
}
124+
125+
func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
126+
fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
127+
storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,
128+
dictLocs []uint64) (*SegmentBase, error) {
129+
sb := &SegmentBase{
130+
mem: mem,
131+
memCRC: memCRC,
132+
chunkFactor: chunkFactor,
133+
fieldsMap: fieldsMap,
134+
fieldsInv: fieldsInv,
135+
numDocs: numDocs,
136+
storedIndexOffset: storedIndexOffset,
137+
fieldsIndexOffset: fieldsIndexOffset,
138+
docValueOffset: docValueOffset,
139+
dictLocs: dictLocs,
140+
fieldDvReaders: make(map[uint16]*docValueReader),
141+
fieldFSTs: make(map[uint16]*vellum.FST),
142+
}
143+
sb.updateSize()
144+
145+
err := sb.loadDvReaders()
146+
if err != nil {
147+
return nil, err
148+
}
149+
150+
return sb, nil
151+
}

0 commit comments

Comments
 (0)