1
- /****************************************************************
2
- * Licensed to the Apache Software Foundation (ASF) under one *
3
- * or more contributor license agreements. See the NOTICE file *
4
- * distributed with this work for additional information *
5
- * regarding copyright ownership. The ASF licenses this file *
6
- * to you under the Apache License, Version 2.0 (the *
7
- * "License"); you may not use this file except in compliance *
8
- * with the License. You may obtain a copy of the License at *
9
- * *
10
- * http://www.apache.org/licenses/LICENSE-2.0 *
11
- * *
12
- * Unless required by applicable law or agreed to in writing, *
13
- * software distributed under the License is distributed on an *
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
15
- * KIND, either express or implied. See the License for the *
16
- * specific language governing permissions and limitations *
17
- * under the License. *
18
- ****************************************************************/
1
+ /**
2
+ * **************************************************************
3
+ * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file *
4
+ * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you
5
+ * under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You
6
+ * may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or
7
+ * agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR
8
+ * CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and
9
+ * limitations * under the License. * **************************************************************
10
+ */
19
11
package ro .ieugen .mboxiterator ;
20
12
21
13
import java .io .*;
24
16
import java .nio .channels .FileChannel ;
25
17
import java .nio .charset .Charset ;
26
18
import java .nio .charset .CharsetDecoder ;
19
+ import java .nio .charset .CoderResult ;
27
20
import java .util .Iterator ;
28
21
import java .util .regex .Matcher ;
29
22
import java .util .regex .Pattern ;
38
31
public class MboxIterator implements Iterable <CharBuffer >, Closeable {
39
32
40
33
private static final Logger LOG = LoggerFactory .getLogger (MboxIterator .class );
34
+ private static final int MAX_MSG_LENGTH = 1024 * 1024 * 10 ; // 10Mb of Chars!
41
35
private final FileInputStream fis ;
42
36
private final CharBuffer mboxCharBuffer ;
43
37
private final Matcher fromLineMathcer ;
@@ -56,11 +50,9 @@ private MboxIterator(final File mbox,
56
50
final MappedByteBuffer byteBuffer = fileChannel .map (FileChannel .MapMode .READ_ONLY , 0 ,
57
51
fileChannel .size ());
58
52
final CharsetDecoder DECODER = Charset .forName (charset ).newDecoder ();
59
- /*TODO: DECODER.decode() this will try to decode the whole file.
60
- * It could be problematic if the file is large (~2gb).
61
- * Improve this by working with chunks.
62
- */
63
- mboxCharBuffer = DECODER .decode (byteBuffer );
53
+ mboxCharBuffer = CharBuffer .allocate (MAX_MSG_LENGTH );
54
+ CoderResult result = DECODER .decode (byteBuffer , mboxCharBuffer , false );
55
+
64
56
final Pattern MESSAGE_START = Pattern .compile (regexpPattern , regexpFlags );
65
57
fromLineMathcer = MESSAGE_START .matcher (mboxCharBuffer );
66
58
hasMore = fromLineMathcer .find ();
@@ -95,10 +87,10 @@ public boolean hasNext() {
95
87
96
88
@ Override
97
89
public CharBuffer next () {
98
- LOG .info ("next() called at offset {}" , fromLineMathcer .start ());
90
+ // LOG.info("next() called at offset {}", fromLineMathcer.start());
99
91
final CharBuffer message = mboxCharBuffer .slice ();
100
92
message .position (fromLineMathcer .start ());
101
- logBufferDetails (message );
93
+ // logBufferDetails(message);
102
94
hasMore = fromLineMathcer .find ();
103
95
if (hasMore ) {
104
96
LOG .info ("We limit the buffer to {} ?? {}" ,
0 commit comments