Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify calculating hash from file with chunked-file-reader #49

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 18 additions & 33 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,44 +48,29 @@ var rawHash = spark.end(true); // OR raw hash (binary string)

### Hash a file incrementally

If you want to calculate an MD5 hash of a file, it's recommended to read the
file in chunks and calculate the hash incrementally. For reading a file in
chunks you can use the [chunked-file-reader](https://www.npmjs.com/package/chunked-file-reader)
package.

NOTE: If you test the code bellow using the file:// protocol in chrome you must start the browser with -allow-file-access-from-files argument.
Please see: http://code.google.com/p/chromium/issues/detail?id=60889

```js
document.getElementById('file').addEventListener('change', function () {
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
file = this.files[0],
chunkSize = 2097152, // Read in chunks of 2MB
chunks = Math.ceil(file.size / chunkSize),
currentChunk = 0,
spark = new SparkMD5.ArrayBuffer(),
fileReader = new FileReader();

fileReader.onload = function (e) {
console.log('read chunk nr', currentChunk + 1, 'of', chunks);
spark.append(e.target.result); // Append array buffer
currentChunk++;

if (currentChunk < chunks) {
loadNext();
} else {
console.log('finished loading');
console.info('computed hash', spark.end()); // Compute hash
}
};

fileReader.onerror = function () {
console.warn('oops, something went wrong.');
};

function loadNext() {
var start = currentChunk * chunkSize,
end = ((start + chunkSize) >= file.size) ? file.size : start + chunkSize;

fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
}

loadNext();
var file = this.files[0],
spark = new SparkMD5.ArrayBuffer(),
reader = new ChunkedFileReader({ maxChunkSize: 2*1024*1024 }); // https://www.npmjs.com/package/chunked-file-reader

reader.subscribe('chunk', function (e) {
spark.append(e.chunk);
});

reader.subscribe('end', function (e) {
console.info('computed hash', spark.end());
});

reader.readChunks(file);
});
```

Expand Down
50 changes: 17 additions & 33 deletions test/file_reader.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<link rel="stylesheet" href="css/bootstrap-1.4.min.css">
<script src="../spark-md5.js" type="text/javascript"></script>
<script src="js/chunked-file-reader-0.0.3.js" type="text/javascript"></script>

<style type="text/css" media="screen">
.alert-message {
Expand Down Expand Up @@ -40,8 +41,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
<div id="log"></div>

<script type="text/javascript">
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
log = document.getElementById('log'),
var log = document.getElementById('log'),
input = document.getElementById('file'),
running = false,
ua = navigator.userAgent.toLowerCase();
Expand All @@ -64,20 +64,17 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
return;
}

var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
file = input.files[0],
chunkSize = 2097152, // read in chunks of 2MB
chunks = Math.ceil(file.size / chunkSize),
currentChunk = 0,
var file = input.files[0],
spark = new SparkMD5.ArrayBuffer(),
currentChunk = 0,
time,
uniqueId = 'chunk_' + (new Date().getTime()),
chunkId = null,
fileReader = new FileReader();
reader = new ChunkedFileReader({ maxChunkSize: 2*1024*1024 });

fileReader.onload = function (e) {
reader.subscribe('chunk', function (e) {
if (currentChunk === 0) {
registerLog('Read chunk number <strong id="' + uniqueId + '">' + (currentChunk + 1) + '</strong> of <strong>' + chunks + '</strong><br/>', 'info');
registerLog('Read chunk number <strong id="' + uniqueId + '">' + (currentChunk + 1) + '</strong><br/>', 'info');
} else {
if (chunkId === null) {
chunkId = document.getElementById(uniqueId);
Expand All @@ -86,35 +83,22 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
chunkId.innerHTML = currentChunk + 1;
}

spark.append(e.target.result); // append array buffer
currentChunk += 1;
spark.append(e.chunk); // append array buffer

if (currentChunk < chunks) {
loadNext();
} else {
running = false;
registerLog('<strong>Finished loading!</strong><br/>', 'success');
registerLog('<strong>Computed hash:</strong> ' + spark.end() + '<br/>', 'success'); // compute hash
registerLog('<strong>Total time:</strong> ' + (new Date().getTime() - time) + 'ms<br/>', 'success');
}
};
currentChunk += 1;
});

fileReader.onerror = function () {
reader.subscribe('end', function (e) {
running = false;
registerLog('<strong>Oops, something went wrong.</strong>', 'error');
};

function loadNext() {
var start = currentChunk * chunkSize,
end = start + chunkSize >= file.size ? file.size : start + chunkSize;

fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
}
registerLog('<strong>Finished loading!</strong><br/>', 'success');
registerLog('<strong>Computed hash:</strong> ' + spark.end() + '<br/>', 'success'); // compute hash
registerLog('<strong>Total time:</strong> ' + (new Date().getTime() - time) + 'ms<br/>', 'success');
});

running = true;
registerLog('<p></p><strong>Starting incremental test (' + file.name + ')</strong><br/>', 'info');
time = new Date().getTime();
loadNext();
reader.readChunks(file);
}

function doNormalTest() {
Expand Down Expand Up @@ -160,7 +144,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
}
}

if (!('FileReader' in window) || !('File' in window) || !blobSlice) {
if (!('FileReader' in window) || !('File' in window) || !File.prototype.slice) {
registerLog('<p><strong>Your browser does not support the FileAPI or slicing of files.</strong></p>', 'error');
} else {
registerLog('Keep your devtools closed otherwise this example will be a LOT slower', 'info');
Expand Down
11 changes: 5 additions & 6 deletions test/file_reader_binary.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<link rel="stylesheet" href="css/bootstrap-1.4.min.css">
<script src="../spark-md5.js" type="text/javascript"></script>
<script src="js/chunked-file-reader-0.0.3.js" type="text/javascript"></script>

<style type="text/css" media="screen">
.alert-message {
Expand Down Expand Up @@ -40,8 +41,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
<div id="log"></div>

<script type="text/javascript">
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
log = document.getElementById('log'),
var log = document.getElementById('log'),
input = document.getElementById('file'),
running = false,
ua = navigator.userAgent.toLowerCase();
Expand All @@ -64,8 +64,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
return;
}

var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
file = input.files[0],
var file = input.files[0],
chunkSize = 2097152, // read in chunks of 2MB
chunks = Math.ceil(file.size / chunkSize),
currentChunk = 0,
Expand Down Expand Up @@ -108,7 +107,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
var start = currentChunk * chunkSize,
end = start + chunkSize >= file.size ? file.size : start + chunkSize;

fileReader.readAsBinaryString(blobSlice.call(file, start, end));
fileReader.readAsBinaryString(file.slice(start, end));
}

running = true;
Expand Down Expand Up @@ -160,7 +159,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
}
}

if (!('FileReader' in window) || !('File' in window) || !blobSlice) {
if (!('FileReader' in window) || !('File' in window) || !File.prototype.slice) {
registerLog('<p><strong>Your browser does not support the FileAPI or slicing of files.</strong></p>', 'error');
} else {
registerLog('Keep your devtools closed otherwise this example will be a LOT slower', 'info');
Expand Down
130 changes: 130 additions & 0 deletions test/js/chunked-file-reader-0.0.3.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
(function(root, factory){
if(typeof define === 'function' && define.amd)
{
define(factory);
}
else if(typeof exports === 'object' && typeof module != 'undefined')
{
module.exports= factory();
}
else
{
this.ChunkedFileReader= factory();
}
}(this, function(){
'use strict';

/**
* Create a new instance of ChunkedFileReader.
*
* @class ChunkedFileReader
* @constructor
* @param opts {object} The options.
* Valid options are:
* maxChunkSize - Maximum chunk size
*/
var ChunkedFileReader= function(opts){
opts || (opts= {});

this.maxChunkSize= (opts.maxChunkSize || 256 * 1024);
this.listeners= {};
};

/**
* Subscribe a event.
*
* @method subscribe
* @param eventName {string} The event name to be subscribed
* @param listener {function} The listener function to be invoked on events
* @param thisObj {any} The `this' object to be used for invoking listener function
*/
ChunkedFileReader.prototype.subscribe= function(eventName, listener, thisObj){
this.listeners[eventName]= (this.listeners[eventName] || []);
this.listeners[eventName].push({
ctx: thisObj,
fun: listener
});
};

/**
* **Internal use**
*
* @method publish
* @param eventName {string} The event name
* @param eventArgs {object} The event args to be passed each listeners
*/
ChunkedFileReader.prototype.publish= function(eventName, eventArgs){
(this.listeners[eventName] || []).forEach(function(listener){
listener.fun.call(listener.ctx, eventArgs);
}, this);
};

/**
* Read chunks from File object.
*
* It produces some events:<br>
* <ul>
* <li>"begin" - On started file reading.</li>
* <li>"progress" - On progress changed.</li>
* <li>"chunk" - On read a chunk.</li>
* <li>"end" - On Finished reading.</li>
* </ul>
*
* @method readChunks
* @param input {blob} The Blob (File) object
*/
ChunkedFileReader.prototype.readChunks= function(input){
var chunkSize= Math.min(this.maxChunkSize, input.size);
var remainingBytes= input.size;
var nchunks= (remainingBytes % chunkSize === 0)
? remainingBytes / chunkSize
: parseInt(remainingBytes / chunkSize) + 1;

var pos= 0;
var reader= new FileReader(input);
var seq= 1;
var that= this;
reader.onloadend= function(evt){
if(evt.target.readyState !== FileReader.DONE)
{
return;
}

that.publish('progress', {
nchunks: nchunks,
done: seq,
done_ratio: (seq / nchunks)
});
that.publish('chunk', {
seq: seq,
nchunks: nchunks,
chunk: evt.target.result
});
++seq;

pos+= chunkSize;
remainingBytes-= chunkSize;
if(remainingBytes < chunkSize)
{
chunkSize= remainingBytes;
}
if(remainingBytes > 0)
{
reader.readAsArrayBuffer(input.slice(pos, pos + chunkSize));
}
else
{
that.publish('end', {
nchunks: nchunks,
});
}
};

this.publish('begin', {
nchunks: nchunks
});

reader.readAsArrayBuffer(input.slice(pos, pos + chunkSize));
};
return ChunkedFileReader;
}));
41 changes: 11 additions & 30 deletions test/readme_example.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,26 @@
<title>SparkMD5 readme example</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<script src="../spark-md5.js"></script>
<script src="js/chunked-file-reader-0.0.3.js" type="text/javascript"></script>
</head>
<body onload="init()">
<input type="file" id="file" />
<script>
function init() {
document.getElementById('file').addEventListener('change', function () {
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
file = this.files[0],
chunkSize = 2097152, // Read in chunks of 2MB
chunks = Math.ceil(file.size / chunkSize),
currentChunk = 0,
spark = new SparkMD5.ArrayBuffer(),
fileReader = new FileReader();
var file = this.files[0],
spark = new SparkMD5.ArrayBuffer(),
reader = new ChunkedFileReader({ maxChunkSize: 2*1024*1024 }); // https://www.npmjs.com/package/chunked-file-reader

fileReader.onload = function (e) {
console.log('read chunk nr', currentChunk + 1, 'of', chunks);
spark.append(e.target.result); // Append array buffer
currentChunk++;
reader.subscribe('chunk', function (e) {
spark.append(e.chunk);
});

if (currentChunk < chunks) {
loadNext();
} else {
console.log('finished loading');
console.info('computed hash', spark.end()); // Compute hash
}
};
reader.subscribe('end', function (e) {
console.info('computed hash', spark.end());
});

fileReader.onerror = function () {
console.warn('oops, something went wrong.');
};

function loadNext() {
var start = currentChunk * chunkSize,
end = ((start + chunkSize) >= file.size) ? file.size : start + chunkSize;

fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
}

loadNext();
reader.readChunks(file);
});
}
</script>
Expand Down