@@ -64,12 +64,13 @@ class Loc(NamedTuple):
64
64
65
65
def __add__ (self , other : object ) -> Loc :
66
66
if isinstance (other , str ):
67
+ """Return the line,column after the additional text"""
67
68
line , col = self .line , self .col
68
69
for c in other :
69
- col += 1
70
70
if c == '\n ' :
71
71
line += 1
72
72
col = 0
73
+ col += 1
73
74
return Loc (line , col )
74
75
return NotImplemented
75
76
@@ -190,23 +191,36 @@ class State(Enum):
190
191
191
192
192
193
class LocationIterator (Iterator [str ]):
193
- _line : int = 1
194
- _col : int = 0
194
+ """A string iterator which tracks the line and column information of the characters in the string"""
195
+
196
+ _line : int
197
+ _col : int
195
198
_iter : Iterator [str ]
199
+ _nextline : bool
196
200
197
- def __init__ (self , x : Iterable [str ]) -> None :
198
- self ._iter = iter (x )
201
+ def __init__ (self , text : Iterable [str ], line : int = 1 , col : int = 0 ) -> None :
202
+ self ._iter = iter (text )
203
+ self ._line = line
204
+ self ._col = col
205
+ self ._nextline = False
199
206
200
207
def __next__ (self ) -> str :
201
208
la = next (self ._iter )
202
209
self ._col += 1
203
- if la == ' \n ' :
210
+ if self . _nextline :
204
211
self ._line += 1
205
- self ._col = 0
212
+ self ._col = 1
213
+ self ._nextline = la == '\n '
206
214
return la
207
215
208
216
@property
209
217
def loc (self ) -> Loc :
218
+ """Returns the line,column of the last character returned by the iterator
219
+
220
+ If no character has been returned yet, it will be the location that this
221
+ iterator was initialized with. The default is (1,0), which is the only
222
+ time the column will be 0.
223
+ """
210
224
return Loc (self ._line , self ._col )
211
225
212
226
@@ -617,7 +631,7 @@ def _bubble_or_context(la: str, it: LocationIterator, *, context: bool = False)
617
631
bubble , final_token , la , bubble_loc = _raw_bubble (la , it , keywords )
618
632
if bubble is not None :
619
633
label_tokens , bubble , bubble_loc = _strip_bubble_label (bubble , bubble_loc )
620
- bubble , attr_tokens = _strip_bubble_attr (bubble )
634
+ bubble , attr_tokens = _strip_bubble_attr (bubble , bubble_loc )
621
635
622
636
tokens = label_tokens
623
637
if bubble :
@@ -723,23 +737,23 @@ def _strip_bubble_label(bubble: str, loc: Loc) -> tuple[list[Token], str, Loc]:
723
737
Token (':' , TokenType .COLON , colon_loc ),
724
738
],
725
739
match ['rest' ],
726
- loc + bubble [: match .start ('rest' )],
740
+ colon_loc + bubble [match . start ( 'colon' ) : match .start ('rest' )],
727
741
)
728
742
729
743
730
- def _strip_bubble_attr (bubble : str ) -> tuple [str , list [Token ]]:
744
+ def _strip_bubble_attr (bubble : str , loc : Loc ) -> tuple [str , list [Token ]]:
731
745
for i in range (len (bubble ) - 1 , - 1 , - 1 ):
732
746
if bubble [i ] != '[' :
733
747
continue
734
748
735
749
prefix = bubble [:i ]
736
- suffix = bubble [i :]
750
+ suffix = bubble [i + 1 :]
751
+ start_loc = loc + prefix
737
752
738
- it = iter (suffix )
739
- next (it ) # skip "["
753
+ it = LocationIterator (suffix , * start_loc )
740
754
la = next (it , '' )
741
755
742
- tokens = [Token ('[' , TokenType .LBRACK , INIT_LOC )]
756
+ tokens = [Token ('[' , TokenType .LBRACK , start_loc )]
743
757
attr_tokens = _attr (la , it )
744
758
try :
745
759
while True :
@@ -757,7 +771,7 @@ def _strip_bubble_attr(bubble: str) -> tuple[str, list[Token]]:
757
771
return bubble , []
758
772
759
773
760
- def _attr (la : str , it : Iterator [ str ] ) -> Generator [Token , None , str ]:
774
+ def _attr (la : str , it : LocationIterator ) -> Generator [Token , None , str ]:
761
775
la = _skip_ws_and_comments (la , it )
762
776
if not la :
763
777
raise _unexpected_character (la )
@@ -769,46 +783,48 @@ def _attr(la: str, it: Iterator[str]) -> Generator[Token, None, str]:
769
783
la = _skip_ws_and_comments (la , it )
770
784
771
785
if la == '(' : # TAG_STATE
772
- yield Token ('(' , TokenType .LPAREN , INIT_LOC )
786
+ yield Token ('(' , TokenType .LPAREN , it . loc )
773
787
la = next (it , '' )
788
+ loc = it .loc
774
789
775
790
if la == '"' :
776
791
text , token_type , la = _string (la , it )
777
- yield Token (text , token_type , INIT_LOC )
792
+ yield Token (text , token_type , loc )
778
793
else :
779
794
content , la = _attr_content (la , it )
780
795
if content :
781
796
# allows 'key()'
782
- yield Token (content , TokenType .ATTR_CONTENT , INIT_LOC )
797
+ yield Token (content , TokenType .ATTR_CONTENT , loc )
783
798
784
799
if la != ')' :
785
800
raise _unexpected_character (la )
786
801
787
- yield Token (')' , TokenType .RPAREN , INIT_LOC )
802
+ yield Token (')' , TokenType .RPAREN , it . loc )
788
803
789
804
la = next (it , '' )
790
805
la = _skip_ws_and_comments (la , it )
791
806
792
807
if la != ',' :
793
808
break
794
809
795
- yield Token (',' , TokenType .COMMA , INIT_LOC )
810
+ yield Token (',' , TokenType .COMMA , it . loc )
796
811
la = next (it , '' )
797
812
la = _skip_ws_and_comments (la , it )
798
813
799
814
if la != ']' :
800
815
raise _unexpected_character (la )
801
816
802
- yield Token (']' , TokenType .RBRACK , INIT_LOC )
817
+ yield Token (']' , TokenType .RBRACK , it . loc )
803
818
la = next (it , '' )
804
819
805
820
return la # noqa: B901
806
821
807
822
808
- def _attr_key (la : str , it : Iterator [ str ] ) -> tuple [Token , str ]:
823
+ def _attr_key (la : str , it : LocationIterator ) -> tuple [Token , str ]:
809
824
# ["a"-"z","1"-"9"](["A"-"Z", "a"-"z", "-", "0"-"9", "."])*("<" (["A"-"Z", "a"-"z", "-", "0"-"9"])+ ">")?
810
825
811
826
consumed : list [str ] = []
827
+ loc = it .loc
812
828
if la not in _LOWER and la not in _DIGIT :
813
829
raise _unexpected_character (la )
814
830
@@ -840,7 +856,7 @@ def _attr_key(la: str, it: Iterator[str]) -> tuple[Token, str]:
840
856
la = next (it , '' )
841
857
842
858
attr_key = '' .join (consumed )
843
- return Token (attr_key , TokenType .ATTR_KEY , INIT_LOC ), la
859
+ return Token (attr_key , TokenType .ATTR_KEY , loc ), la
844
860
845
861
846
862
_ATTR_CONTENT_FORBIDDEN : Final = {'' , '\n ' , '\r ' , '"' }
0 commit comments