Skip to content

Commit

Permalink
Perform operations over raw bytes where possible.
Browse files Browse the repository at this point in the history
  • Loading branch information
m00natic committed Aug 29, 2018
1 parent 9e3e1cf commit 0c262a2
Show file tree
Hide file tree
Showing 6 changed files with 305 additions and 116 deletions.
28 changes: 28 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Copyright 2018 Andrey Kotlarski

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 changes: 40 additions & 0 deletions README.org
Original file line number Diff line number Diff line change
@@ -1,3 +1,43 @@
* SQL-like operations over fixed field DBs

See [[https://m00natic.github.io/lisp/manual-jit.html][Uniform Structured Syntax, Metaprogramming and Run-time Compilation]]

This is a Common Lisp library which is to enable subset of SQL-like
operations over DBs with fields/columns with fixed number of ASCII
bytes. Thus far it works on files in such format.

** Declare DBs

The defspec macro is to be used to register a DB with respective
schema. Here's how the article example would look like:

#+BEGIN_SRC lisp
(defspec (recordS5 ;db name, letter case doesn't matter
:file ;db type, currently only :file is supported
"/path/to/records5.db") ;filename
;; declare fields in order, offsets are automatically assigned
2 ;2 unnamed filler bytes
(cxr 2) ;named field with 2 bytes
(type 1)
(subcode 3)
(date_eff 6)
(date_disc 6)
20
(commercial_name 16))
#+END_SRC

** Operations

Here's the article example:

#+BEGIN_SRC lisp
(select (cxr subcode commercial_name date_disc) ;list of selected fields
recordS5 ;db name
:where (and (like cxr "YY|XX") ;where clause
(or (and (like COMMERCIAL_NAME "PET")
(= type "C"))
(and (= type "F")
(= commercial_name "MEAL")))
(< "180620" date_disc)
(< date_eff date_disc)))
#+END_SRC
5 changes: 3 additions & 2 deletions fdbq.asd
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
:description "SQL-like querying over fixed-field DBs."
:author "Andrey Kotlarski <[email protected]>"
:license "BSD-3"
:version "0.0.1"
:depends-on (:cl-ppcre #:cl-string-match)
:version "0.0.2"
:depends-on (:cl-ppcre #:ascii-strings)
:serial t
:components ((:file "package")
(:file "spec")
(:file "filter")
(:file "fdbq")))
156 changes: 55 additions & 101 deletions fdbq.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -6,125 +6,79 @@
"Max chunk size to preload in bytes.")

(defmacro select (field-list db &key where)
"Generate selection procedure and run it."
"Select FIELD-LIST from DB with WHERE filter."
(let ((spec (get-spec db))) ;pull out the specification for this db
`(funcall (compile nil (lambda () ;guarantee execution of a compiled object
(declare (optimize (speed 3) (debug 0) (safety 0)
(compilation-speed 0)))
(do-lines (line ,spec) ;bind line to db entries
(do-lines (line ,spec buffer offset) ;bind buffer/line to db entries
;; if where is empty, condition is considered always satisfied
(when ,(or (gen-where where 'line spec) t)
,(gen-print-selection field-list 'line spec))))))))
(when ,(or (gen-where where 'line spec 'buffer 'offset) t)
,(gen-print-selection field-list 'line spec
'buffer 'offset))))))))

(defmacro do-lines ((line-var spec) &body body)
"Bind LINE-VAR to each line in SPEC specified file and execute BODY."
(defmacro do-lines ((line-var spec &optional buffer-var offset-var) &body body)
"Bind LINE-VAR to each line in SPEC specified source and execute BODY.
If BUFFER-VAR for byte array and OFFSET-VAR for offset within it are supplied,
expose them to BODY as well."
(when (eq :file (spec-type spec))
`(do-file-lines (,line-var ,spec ,buffer-var ,offset-var) ,@body)))

(defmacro do-file-lines ((line-var spec &optional buffer-var offset-var) &body body)
"Bind LINE-VAR to each line in SPEC specified file and execute BODY.
If BUFFER-VAR for byte array and OFFSET-VAR for offset within it are supplied,
expose them to BODY as well."
(let* ((entry-size (spec-size spec)) ;entry size is known
(line-size (1+ entry-size)) ;add 1 for newline
(buffer-size (* line-size (floor +max-buffer-size+ line-size)))
(ins (gensym)) ;make sure the file stream variable is not visible to the body
(buffer (gensym)) ;-//- for read buffer
(bytes (gensym)) ;-//- for number of bytes read
(offset (gensym))) ;-//- current line within buffer offset)
`(let ((,buffer (make-array ,buffer-size :element-type 'ascii:ub-char)) ;allocate read
(bytes (gensym)) ;-//- for number of bytes read
(use-only-line? (null buffer-var)))
(unless buffer-var
(setf buffer-var (gensym)
offset-var (gensym)))
`(let ((,buffer-var (make-array ,buffer-size :element-type 'ascii:ub-char)) ;allocate read
(,line-var (make-string ,entry-size :element-type 'base-char))) ;and line buffers
(declare (type (simple-array ascii:ub-char (,buffer-size)) ,buffer)
(declare (type (simple-array ascii:ub-char (,buffer-size)) ,buffer-var)
(type (simple-base-string ,entry-size) ,line-var) ;no need for newline
(dynamic-extent ,buffer ,line-var)) ;use stack allocation if possible
(dynamic-extent ,buffer-var ,line-var)) ;use stack allocation if possible
(with-open-file (,ins ,(spec-path spec) :direction :input
:element-type 'ascii:ub-char)
(loop for ,bytes fixnum = (read-sequence ,buffer ,ins) ;read as many lines
(loop for ,bytes fixnum = (read-sequence ,buffer-var ,ins) ;read as many lines
until (zerop ,bytes)
;; slide offset through the buffer
do (loop for ,offset fixnum from 0 below (* ,line-size
(floor ,bytes ,line-size))
do (loop for ,offset-var fixnum from 0 below (* ,line-size
(floor ,bytes ,line-size))
by ,line-size
;; fill line with the current window bytes while converting them
do (loop for i fixnum from 0 below ,entry-size
for j fixnum from ,offset
do (setf (aref ,line-var i) (code-char (aref ,buffer j))))
,@body)))))) ;the body only sees the line variable as before

(defun gen-where (where line-var spec)
"Create actual boolean tree for WHERE.
LINE-VAR is symbol representing the current line variable.
SPEC contains fields' offset and size information."
(when (consp where)
(let ((op (first where)))
(cond ((consp op) ;several expressions in a row, recurse
(cons (gen-where op line-var spec)
(gen-where (rest where) line-var spec)))
((member op '(and or not)) ;intermediate node, recurse
(cons op (gen-where (rest where) line-var spec)))
((member op '(= /= < <= > >= like)) ;leaf
(gen-field-op where line-var spec))
(t (error (format nil "Bad where clause: ~A" where)))))))

(defun translate-op (op &optional char?)
"Return string/char operation corresponding to OP."
(intern (concatenate 'string (if char? "CHAR" "STRING")
(symbol-name op))))

(defun simple-regex? (str)
"Check if string is not really a regex.
This a bit pessimistic."
(not (find-if-not #'alphanumericp str)))

(defmethod gen-field-clause ((op (eql 'like)) field1 field2 line-var)
"Generate code for regex clause."
(cond ((null (field-filter field2)) ;regex is taken from a field
`(cl-ppcre:scan (subseq ,line-var ,(db-field-offset field2)
:end ,(+ (db-field-offset field2)
(db-field-size field2)))
,line-var :start ,(db-field-offset field1)
:end ,(+ (db-field-offset field1)
(db-field-size field1))))
((simple-regex? (field-filter field2)) ;use plain search instead of regex
`(search ,(field-filter field2) ,line-var
:start1 ,(db-field-offset field2)
:end1 ,(+ (db-field-offset field2)
(db-field-size field2))
:start2 ,(db-field-offset field1)
:end2 ,(+ (db-field-offset field1)
(db-field-size field1))))
(t `(cl-ppcre:scan ,(field-filter field2)
,line-var :start ,(db-field-offset field1)
:end ,(+ (db-field-offset field1)
(db-field-size field1))))))

(defmethod gen-field-clause (op field1 field2 line-var)
"Generate code for a comparison clause."
(let ((size (min (db-field-size field1)
(db-field-size field2))))
(if (= 1 size) ;optimize single character comparison
(list (translate-op op t)
(if (field-filter field1) ;string literal?
(aref (field-filter field1) 0)
`(aref ,line-var ,(db-field-offset field1)))
(if (field-filter field2)
(aref (field-filter field2) 0)
`(aref ,line-var ,(db-field-offset field2))))
(list (translate-op op)
(or (field-filter field1) line-var)
(or (field-filter field2) line-var)
:start1 (db-field-offset field1)
:end1 (+ (db-field-offset field1) size)
:start2 (db-field-offset field2)
:end2 (+ (db-field-offset field2) size)))))

(defun gen-field-op (clause line-var spec)
"Generate code for a leaf WHERE clause."
(destructuring-bind (op field1 field2) clause
(gen-field-clause op (get-operand-traits field1 spec)
(get-operand-traits field2 spec) line-var)))
do (progn
,(when use-only-line?
`(loop for i fixnum from 0 below ,entry-size
for j fixnum from ,offset-var
do (setf (aref ,line-var i)
(code-char (aref ,buffer-var j)))))
,@body)))))))

(defun gen-print-selection (fields line-var spec)
(defun gen-print-selection (fields line-var spec
&optional buffer-var offset-var)
"Unroll selected FIELDS' print statements.
LINE-VAR is symbol representing the current line variable.
SPEC holds field offset details."
LINE-VAR is symbol representing the current line variable.
SPEC holds field offset details.
BUFFER-VAR is symbol representing the db buffer.
OFFSET-VAR is symbol representing the current offset in the db buffer."
`(progn
,@(loop for field in fields ;collect print statements in list and splice them
collect `(write-string ,line-var nil
:start ,(field-offset field spec)
:end ,(+ (field-offset field spec) ;constant fold
(field-size field spec))))
(format t "|")
,@(if buffer-var
(loop for field in fields ;collect print statements in list and splice them
collect `(loop for i fixnum from (+ ,offset-var
,(field-offset field spec))
below (+ ,offset-var ,(+ (field-offset field spec)
(field-size field spec)))
do (write-char (code-char (aref ,buffer-var i))))
collect '(format t "|"))
(loop for field in fields ;collect print statements in list and splice them
collect `(write-string ,line-var nil
:start ,(field-offset field spec)
:end ,(+ (field-offset field spec) ;constant fold
(field-size field spec)))
collect '(format t "|")))
(format t "~%")))
Loading

0 comments on commit 0c262a2

Please sign in to comment.