jmespath · mtdowling · Dec 7, 2014 · Dec 8, 2014 · Dec 21, 2014 · Dec 24, 2014
diff --git a/docs/jmespath-grammar.txt b/docs/jmespath-grammar.txt
@@ -0,0 +1,135 @@
+expr      = ws ( terminal / non-terminal ) ws
+root-expr = non-test / not / comparison
+non-test  = subexpr / index / flatten / wildcard-index / filter /
+            identifier / current-node / raw-string / literal /
+            root-multi-list / multi-hash / function / group / slice
+group     = begin-group expr end-group
+
+; Insignificant whitespace is allowed around expr nodes and around the
+; following structural tokens.
+begin-array     = ws %x5B ws    ; [ left square bracket
+begin-object    = ws %x7B ws    ; { left curly bracket
+end-array       = ws %x5D ws    ; ] right square bracket
+end-object      = ws %x7D ws    ; } right curly bracket
+begin-group     = ws %x28 ws    ; ( left parenthesis
+end-group       = ws %x29 ws    ; ) right parenthesis
+name-separator  = ws %x3A ws    ; : colon
+value-separator = ws %x2C ws    ; , comma
+decimal-point   = ws %x2E ws    ; .
+pipe-separator  = ws "|" ws
+or-separator    = ws "||" ws
+and-separator   = ws "&&" ws
+lt              = ws "<" ws
+lte             = ws "<=" ws
+gt              = ws ">" ws
+gte             = ws ">=" ws
+eq              = ws "==" ws
+ne              = ws "!=" ws
+expref-token    = "&" ws
+not-token       = "!" ws
+begin-filter    = "[?" ws
+flatten         = ws "[]" ws
+
+; Insignificant whitespace
+ws = *(%x20 / ; Space
+       %x09 / ; Horizontal tab
+       %x0A / ; Line feed or New line
+       %x0D) ; Carriage return
+
+; "&&" binds more tightly than "||"
+; "||" binds more tightly than "|".
+terminal     = pipe / or / and
+non-terminal = root-expr / wildcard-values
+pipe         = expr pipe-separator ( non-terminal / or / and )
+or           = ( non-terminal / and ) or-separator ( non-terminal / or / and )
+and          = non-terminal and-separator ( non-terminal / and )
+
+; subexpr handles most that descend into nodes.
+subexpr            = object-subexpr / array-subexpr
+object-subexpr     = object-subexpr-lhs decimal-point object-subexpr-rhs
+object-subexpr-lhs = subexpr / index / flatten / wildcard-index /
+                     filter / identifier / current-node / literal /
+                     multi-hash / function / group / slice / wildcard-values
+object-subexpr-rhs = identifier / multi-list / multi-hash / function /
+                     wildcard-values
+array-subexpr      = array-subexpr-lhs array-subexpr-rhs
+array-subexpr-lhs  = subexpr / index / flatten / wildcard-index / filter /
+                     identifier / current-node / literal / root-multi-list /
+                     function / group / slice / wildcard-values / literal
+array-subexpr-rhs  = index / slice / wildcard-index / flatten / filter
+
+; Array related rules
+index           = begin-array number end-array
+slice           = begin-array [number] name-separator
+                               [number] [name-separator [number] ]
+                               end-array
+wildcard-index   = begin-array "*" end-array
+filter           = begin-filter filter-condition end-array
+filter-condition = not / non-test / terminal / comparison
+
+not             = not-token ( non-test / not )
+comparison      = non-terminal comparator ( non-test / not / wildcard-values )
+comparator      = lt / lte / gt / gte / eq / ne
+
+root-multi-list = begin-array
+                  ( root-expr / multiple-values / terminal )
+                  end-array
+multi-list      = begin-array ( expr / multiple-values ) end-array
+multiple-values = expr 1*( value-separator expr )
+
+multi-hash      = begin-object ( keyval *( value-separator keyval ) ) end-object
+keyval          = identifier name-separator expr
+
+function        = unquoted-string arg-list
+arg-list        = begin-group [ arg *( value-separator arg ) ] end-group
+arg             = expr / expref
+
+expref          = expref-token expr
+wildcard-values = "*"
+current-node    = "@"
+
+number            = ["-"] int
+
+; Strings, identifiers, and characters.
+raw-string        = "'" *raw-string-char "'"
+raw-string-char   = (%x20-26 / %x28-5B / %x5D-10FFFF) / raw-string-escape
+raw-string-escape = escape ["'"]
+
+identifier        = unquoted-string / quoted-string
+unquoted-string   = ( ALPHA / "_" ) *( DIGIT / ALPHA / "_" )
+quoted-string     = DQUOTE *char DQUOTE
+char              = unescaped-char / escaped-char
+unescaped-char    = %x20-21 / %x23-5B / %x5D-10FFFF
+escape            = %x5C             ; "\"
+escaped-char      = escape (
+                     %x22 /          ; "    quotation mark  U+0022
+                     %x5C /          ; \    reverse solidus U+005C
+                     %x2F /          ; /    solidus         U+002F
+                     %x62 /          ; b    backspace       U+0008
+                     %x66 /          ; f    form feed       U+000C
+                     %x6E /          ; n    line feed       U+000A
+                     %x72 /          ; r    carriage return U+000D
+                     %x74 /          ; t    tab             U+0009
+                     %x75 4HEXDIG )  ; uXXXX                U+XXXX
+
+; Literal rules (e.g., "`[]`")
+literal           = "`" json-value "`"
+literal-char      = unescaped-literal / escaped-literal
+unescaped-literal = %x20-5f / %x61-10FFFF ; Any character except "`"
+escaped-literal   = escaped-char / ( escape "`" )
+
+; JSON related grammar (for literal and string values)
+json-value  = false / null / true / object / array
+json-value  =/ json-number / json-string
+json-string = DQUOTE *literal-char DQUOTE
+false       = %x66.61.6c.73.65   ; false
+null        = %x6e.75.6c.6c      ; null
+true        = %x74.72.75.65      ; true
+object      = begin-object [ member *( value-separator member ) ] end-object
+member      = json-string name-separator json-value
+array       = begin-array [ json-value *( value-separator json-value ) ] end-array
+json-number = ["-"] int [frac] [exp]
+digit1-9    = %x31-39 ; 1-9
+int         = "0" / ( digit1-9 *DIGIT )
+exp         = "e" [ "-" / "+" ] 1*DIGIT
+frac        = decimal-point 1*DIGIT
diff --git a/docs/specification.rst b/docs/specification.rst
@@ -26,103 +26,10 @@ language equivalent value.
 Grammar
 =======
 
-The grammar is specified using ABNF, as described in `RFC4234`_
+The :download:`JMESPath grammar <jmespath-grammar.txt>` is specified using ABNF,
+as described in `RFC4234`_
 
-::
-
-    expression        = sub-expression / index-expression / or-expression / identifier
-    expression        =/ "*" / multi-select-list / multi-select-hash / literal
-    expression        =/ function-expression / pipe-expression
-    sub-expression    = expression "." ( identifier /
-                                         multi-select-list /
-                                         multi-select-hash /
-                                         function-expression /
-                                         "*" )
-    or-expression     = expression "||" expression
-    pipe-expression   = expression "|" expression
-    index-expression  = expression bracket-specifier / bracket-specifier
-    multi-select-list = "[" ( expression *( "," expression ) ) "]"
-    multi-select-hash = "{" ( keyval-expr *( "," keyval-expr ) ) "}"
-    keyval-expr       = identifier ":" expression
-    bracket-specifier = "[" (number / "*") "]" / "[]"
-    bracket-specifier =/ "[?" list-filter-expr "]"
-    list-filter-expr  = expression comparator expression
-    comparator        = "<" / "<=" / "==" / ">=" / ">" / "!="
-    function-expression = unquoted-string  (
-                            no-args  /
-                            one-or-more-args )
-    no-args             = "(" ")"
-    one-or-more-args    = "(" ( function-arg *( "," function-arg ) ) ")"
-    function-arg        = expression / current-node / expression-type
-    current-node        = "@"
-    expression-type     = "&" expression
-
-    literal           = "`" json-value "`"
-    literal           =/ "`" 1*(unescaped-literal / escaped-literal) "`"
-    unescaped-literal = %x20-21 /       ; space !
-                            %x23-5A /   ; # - [
-                            %x5D-5F /   ; ] ^ _
-                            %x61-7A     ; a-z
-                            %x7C-10FFFF ; |}~ ...
-    escaped-literal   = escaped-char / (escape %x60)
-    number            = ["-"]1*digit
-    digit             = %x30-39
-    identifier        = unquoted-string / quoted-string
-    unquoted-string   = (%x41-5A / %x61-7A / %x5F) *(  ; a-zA-Z_
-                            %x30-39  /  ; 0-9
-                            %x41-5A /  ; A-Z
-                            %x5F    /  ; _
-                            %x61-7A)   ; a-z
-    quoted-string     = quote 1*(unescaped-char / escaped-char) quote
-    unescaped-char    = %x20-21 / %x23-5B / %x5D-10FFFF
-    escape            = %x5C   ; Back slash: \
-    quote             = %x22   ; Double quote: '"'
-    escaped-char      = escape (
-                            %x22 /          ; "    quotation mark  U+0022
-                            %x5C /          ; \    reverse solidus U+005C
-                            %x2F /          ; /    solidus         U+002F
-                            %x62 /          ; b    backspace       U+0008
-                            %x66 /          ; f    form feed       U+000C
-                            %x6E /          ; n    line feed       U+000A
-                            %x72 /          ; r    carriage return U+000D
-                            %x74 /          ; t    tab             U+0009
-                            %x75 4HEXDIG )  ; uXXXX                U+XXXX
-
-    ; The ``json-value`` is any valid JSON value with the one exception that the
-    ; ``%x60`` character must be escaped.  While it's encouraged that implementations
-    ; use any existing JSON parser for this grammar rule (after handling the escaped
-    ; literal characters), the grammar rule is shown below for completeness::
-
-    json-value = false / null / true / json-object / json-array /
-                 json-number / json-quoted-string
-    false = %x66.61.6c.73.65   ; false
-    null  = %x6e.75.6c.6c      ; null
-    true  = %x74.72.75.65      ; true
-    json-quoted-string = %x22 1*(unescaped-literal / escaped-literal) %x22
-    begin-array     = ws %x5B ws  ; [ left square bracket
-    begin-object    = ws %x7B ws  ; { left curly bracket
-    end-array       = ws %x5D ws  ; ] right square bracket
-    end-object      = ws %x7D ws  ; } right curly bracket
-    name-separator  = ws %x3A ws  ; : colon
-    value-separator = ws %x2C ws  ; , comma
-    ws              = *(%x20 /              ; Space
-                        %x09 /              ; Horizontal tab
-                        %x0A /              ; Line feed or New line
-                        %x0D                ; Carriage return
-                       )
-    json-object = begin-object [ member *( value-separator member ) ] end-object
-    member = quoted-string name-separator json-value
-    json-array = begin-array [ json-value *( value-separator json-value ) ] end-array
-    json-number = [ minus ] int [ frac ] [ exp ]
-    decimal-point = %x2E       ; .
-    digit1-9 = %x31-39         ; 1-9
-    e = %x65 / %x45            ; e E
-    exp = e [ minus / plus ] 1*DIGIT
-    frac = decimal-point 1*DIGIT
-    int = zero / ( digit1-9 *DIGIT )
-    minus = %x2D               ; -
-    plus = %x2B                ; +
-    zero = %x30                ; 0
+.. literalinclude:: jmespath-grammar.txt
 
 .. _identifiers:
 
@@ -481,26 +388,29 @@ Literal Expressions
 
 ::
 
-    literal           = "`" json-value "`"
-    literal           =/ "`" 1*(unescaped-literal / escaped-literal) "`"
-    unescaped-literal = %x20-21 /       ; space !
-                            %x23-5A /   ; # - [
-                            %x5D-5F /   ; ] ^ _
-                            %x61-7A     ; a-z
-                            %x7C-10FFFF ; |}~ ...
+    literal           = "`" 1*(unescaped-literal / escaped-literal) "`"
+    unescaped-literal = %x20-10FFFF
     escaped-literal   = escaped-char / (escape %x60)
 
 A literal expression is an expression that allows arbitrary JSON objects to be
-specified.  This is useful in filter expressions as well as multi select hashes
+specified. This is useful in filter expressions as well as multi select hashes
 (to create arbitrary key value pairs), but is allowed anywhere an expression is
-allowed.  The specification includes the ABNF for JSON, implementations should
-use an existing JSON parser to parse literal values.  Note that the ``\```
-character must now be escaped in a ``json-value`` which means implementations
-need to handle this case before passing the resulting string to a JSON parser.
-
-Note the second literal rule.  This is used to specify a string such that
-double quotes do not have to be included.  This means that the literal
-expression ``\`"foo"\``` is equivalent to ``\`foo\```.
+allowed.
+
+Implementations should use an existing JSON parser to parse literal values when
+a literal expression value is a valid JSON value based on the
+`JSON ABNF <http://www.ietf.org/rfc/rfc4627.txt>`_.
+
+.. note::
+
+    ``\``` may be escaped in a literal expression, which means that
+    implementations need to handle this case before passing the resulting
+    string to a JSON parser.
+
+When a literal value is not a valid JSON value, implementations must add
+surrounding double quote characters to the value and then pass it to a JSON
+parser. This means that the literal expression ``\`"foo"\``` is equivalent to
+``\`foo\```.
 
 
 Examples