-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhregexp.m
executable file
·112 lines (101 loc) · 3.36 KB
/
hregexp.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
function [Token,Match,Start] = hregexp(str,expr,varargin)
% hREGEXP : Run REGEXP but return Matches and Tokens instead of indices in vars finish and token.
%
% SYNTAX: [Token,Match,Start] = hregexp(str,expr,varargin) % see REGEXP(str,expr,...)
%
% str = cell-array of strings, char-arrays will be converted to cellstring.
% expr = regular expression, see REGEXP().
%
% Varargin can contain any or all of the following strings:
% 'once' : find first match only
% 'nocase' : ignore case
% 'cell' : Cell array output even if 1x1.
%
% Token{StrNo}{MatchNo,TokNo} == Tokens extracted from the matched strings.
% Match{StrNo}{MatchNo} == String that matched the regexp.
% Start{StrNo}(1,MatchNo) == Starting index of matches found.
%
% AUTH: HM, 27.03.03, ver. 1a
%%% TODO:
% - Output all indices for matches and tokens found.
%%% PARAM:
DEREF = 1; % dereference 1x1-cells
ONCE = 0; % use REGEXP(...,'once')
NOCASE = 0; % use REGEXRI() (ignore case)
% if isa(str,'char'),
% DEREF = 1;
% end;
for n = 1:length(varargin(:)),
switch lower(varargin{n}),
case 'once',
ONCE = 1;
case 'cell';
DEREF = 0;
case 'nocase';
NOCASE = 1;
otherwise,
error(['Invalid input no. ',num2str(2+n),'.']);
end;
end;
str = cellstr(str); % Ensure str is a cellstring.
% +++ DO REGEXP():
if ONCE,
if NOCASE,
[Start,finish,token] = regexpi(str, expr, 'once');
else,
[Start,finish,token] = regexp(str, expr, 'once');
end;
else,
if NOCASE,
[Start,finish,token] = regexpi(str, expr);
else,
[Start,finish,token] = regexp(str, expr);
end;
end;
if isa(Start,'numeric'), % Make sure output from regexp is a cell-array.
Start = {Start};
finish = {finish};
token = {token};
end;
NoStr = length(Start); % Nof lines in str.
Match = cell(NoStr,1); % Match{StrNo,1}{MatchNo} = matches found.
Token = cell(NoStr,1); % Token{StrNo,1}{MatchNo,TokNo} = tokens for each match.
for StrNo = 1:NoStr,
NoMatch = length(Start{StrNo});
for MatchNo = 1:NoMatch,
Match{StrNo,1}{MatchNo} = ...
str{StrNo}(Start{StrNo}(MatchNo):finish{StrNo}(MatchNo)); % +++
end;
end;
for StrNo = 1:NoStr, % +++ FOR each string...
if isa(token{StrNo},'numeric'), token{StrNo} = {token{StrNo}}; end;
NoMatch = length(token{StrNo});
for MatchNo = 1:NoMatch, % FOR each match...
NoTok = size(token{StrNo}{MatchNo},1);
if NoTok > 0,
for TokNo = 1:NoTok,
Token{StrNo,1}{MatchNo,TokNo} = ...
str{StrNo}(token{StrNo}{MatchNo}(TokNo,1):token{StrNo}{MatchNo}(TokNo,2)); % +++
end;
else,
% If no tokens, return the whole match as token?
% OBSOLETE - IS DONE BELOW!
% Token{StrNo,1}{MatchNo,1} = Match{StrNo,1}{MatchNo}; % +++
end;
end;
if DEREF && length(Token{StrNo,1}) == 1,
Token{StrNo,1} = Token{StrNo,1}{1};
end;
end;
if DEREF && length(Start) < 2,
Start = Start{1};
Match = Match{1};
Token = Token{1};
end;
if isempty(Token),
Token = Match;
end;
return;
%%% TEST:
[a,b,c] = hregexp({'qwer0987.rec234.rec','234ijhub23.rec'},'(\d+)\.(rec)')
[a,b,c] = hregexp('234ijhub23.rec1234.rec','(\d+)\.rec')