Skip to content

Commit 55872da

Browse files
storage: use hash-stream-validation
1 parent 2b80071 commit 55872da

File tree

5 files changed

+86
-165
lines changed

5 files changed

+86
-165
lines changed

lib/storage/file.js

Lines changed: 63 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@
2222

2323
var bufferEqual = require('buffer-equal');
2424
var ConfigStore = require('configstore');
25-
var crc = require('sse4_crc32');
2625
var crypto = require('crypto');
2726
var duplexify = require('duplexify');
2827
var format = require('string-format-obj');
2928
var fs = require('fs');
29+
var hashStreamValidation = require('hash-stream-validation');
3030
var is = require('is');
3131
var once = require('once');
3232
var request = require('request').defaults({
@@ -430,35 +430,20 @@ File.prototype.createReadStream = function(options) {
430430
var tailRequest = options.end < 0;
431431
var throughStream = streamEvents(through());
432432

433-
var validations = ['crc32c', 'md5'];
434-
var validation;
435-
436-
// For data integrity, hash the contents of the stream as we receive it
437-
// from the server.
438-
var localCrcHash;
439-
var localMd5Hash = crypto.createHash('md5');
433+
var crc32c = false;
434+
var md5 = false;
440435

441436
if (is.string(options.validation)) {
442437
options.validation = options.validation.toLowerCase();
443-
444-
if (validations.indexOf(options.validation) > -1) {
445-
validation = options.validation;
446-
} else {
447-
validation = 'all';
448-
}
438+
crc32c = options.validation === 'crc32c';
439+
md5 = options.validation === 'md5';
449440
}
450441

451442
if (is.undef(options.validation)) {
452-
validation = 'all';
443+
crc32c = true;
444+
md5 = true;
453445
}
454446

455-
if (rangeRequest) {
456-
validation = false;
457-
}
458-
459-
var crc32c = validation === 'crc32c' || validation === 'all';
460-
var md5 = validation === 'md5' || validation === 'all';
461-
462447
var remoteFilePath = format('https://storage.googleapis.com/{b}/{o}', {
463448
b: this.bucket.name,
464449
o: encodeURIComponent(this.name)
@@ -476,36 +461,6 @@ File.prototype.createReadStream = function(options) {
476461

477462
makeAuthorizedReq(remoteFilePath);
478463

479-
// Calculate the hashes from the http.IncomingMessage response stream, which
480-
// will return the bytes from the source without decompressing gzip'd content.
481-
// The request stream will do the decompression so the user receives the
482-
// expected content.
483-
//
484-
// incomingMessage's end event will always occur before request's complete
485-
// event.
486-
throughStream.on('response', function(incomingMessage) {
487-
incomingMessage
488-
.on('data', function(chunk) {
489-
if (crc32c) {
490-
localCrcHash = crc.calculate(chunk, localCrcHash);
491-
}
492-
493-
if (md5) {
494-
localMd5Hash.update(chunk);
495-
}
496-
})
497-
498-
.on('end', function() {
499-
if (crc32c) {
500-
localCrcHash = new Buffer([localCrcHash]).toString('base64');
501-
}
502-
503-
if (md5) {
504-
localMd5Hash = localMd5Hash.digest('base64');
505-
}
506-
});
507-
});
508-
509464
return throughStream;
510465

511466
// Authenticate the request, then pipe the remote API request to the stream
@@ -537,19 +492,36 @@ File.prototype.createReadStream = function(options) {
537492
}
538493

539494
var requestStream = that.bucket.storage.makeAuthorizedRequest_(reqOpts);
495+
var validateStream;
496+
497+
function onHttpResponse(incomingMessage) {
498+
throughStream.emit('response', incomingMessage);
499+
500+
util.handleResp(null, incomingMessage, null, function(err) {
501+
if (err) {
502+
requestStream.unpipe(throughStream);
503+
return;
504+
}
505+
506+
if (!rangeRequest && (crc32c || md5)) {
507+
validateStream = hashStreamValidation({
508+
crc32c: crc32c,
509+
md5: md5
510+
});
511+
512+
// Calculate the hashes from the http.IncomingMessage response stream,
513+
// which will return the bytes from the source without decompressing
514+
// gzip'd content. The request stream will do the decompression so the
515+
// user receives the expected content.
516+
incomingMessage.pipe(validateStream).on('data', util.noop);
517+
}
518+
});
519+
}
540520

541521
requestStream
542522
.on('error', endThroughStream)
543523

544-
.on('response', function(incomingMessage) {
545-
throughStream.emit('response', incomingMessage);
546-
547-
util.handleResp(null, incomingMessage, null, function(err) {
548-
if (err) {
549-
requestStream.unpipe(throughStream);
550-
}
551-
});
552-
})
524+
.on('response', onHttpResponse)
553525

554526
.on('complete', function(res) {
555527
util.handleResp(null, res, null, function(err) {
@@ -564,31 +536,23 @@ File.prototype.createReadStream = function(options) {
564536
return;
565537
}
566538

567-
var failed = false;
568-
var crcFail = true;
569-
var md5Fail = true;
570-
571539
var hashes = {};
572540
res.headers['x-goog-hash'].split(',').forEach(function(hash) {
573541
var hashType = hash.split('=')[0].trim();
574542
hashes[hashType] = hash.substr(hash.indexOf('=') + 1);
575543
});
576544

577-
var remoteMd5 = hashes.md5;
578-
var remoteCrc = hashes.crc32c && hashes.crc32c.substr(4);
579-
580-
if (crc32c) {
581-
crcFail = localCrcHash !== remoteCrc;
582-
failed = crcFail;
583-
}
545+
var failed = false;
584546

585-
if (md5) {
586-
md5Fail = localMd5Hash !== remoteMd5;
587-
failed = md5Fail;
547+
if (crc32c && hashes.crc32c) {
548+
// We must remove the first four bytes from the returned checksum.
549+
// http://stackoverflow.com/questions/25096737/
550+
// base64-encoding-of-crc32c-long-value
551+
failed = !validateStream.test('crc32c', hashes.crc32c.substr(4));
588552
}
589553

590-
if (validation === 'all') {
591-
failed = remoteMd5 ? md5Fail : crcFail;
554+
if (md5 && hashes.md5) {
555+
failed = !validateStream.test('md5', hashes.md5);
592556
}
593557

594558
if (failed) {
@@ -600,9 +564,10 @@ File.prototype.createReadStream = function(options) {
600564
mismatchError.code = 'CONTENT_DOWNLOAD_MISMATCH';
601565

602566
endThroughStream(mismatchError, res);
603-
} else {
604-
endThroughStream(null, res);
567+
return;
605568
}
569+
570+
endThroughStream(null, res);
606571
});
607572
})
608573

@@ -717,63 +682,36 @@ File.prototype.createWriteStream = function(options) {
717682
metadata.contentEncoding = 'gzip';
718683
}
719684

720-
var validations = ['crc32c', 'md5'];
721-
var validation;
685+
var crc32c = false;
686+
var md5 = false;
722687

723688
if (is.string(options.validation)) {
724689
options.validation = options.validation.toLowerCase();
725-
726-
if (validations.indexOf(options.validation) > -1) {
727-
validation = options.validation;
728-
} else {
729-
validation = 'all';
730-
}
690+
crc32c = options.validation === 'crc32c';
691+
md5 = options.validation === 'md5';
731692
}
732693

733694
if (is.undef(options.validation)) {
734-
validation = 'all';
695+
crc32c = true;
696+
md5 = true;
735697
}
736698

737-
var crc32c = validation === 'crc32c' || validation === 'all';
738-
var md5 = validation === 'md5' || validation === 'all';
739-
740699
// Collect data as it comes in to store in a hash. This is compared to the
741700
// checksum value on the returned metadata from the API.
742-
var localCrc32cHash;
743-
var localMd5Hash = crypto.createHash('md5');
701+
var validateStream = hashStreamValidation({
702+
crc32c: crc32c,
703+
md5: md5
704+
});
744705

745706
var writableStream = streamEvents(duplexify());
746707

747708
var throughStream = through();
748709

749-
var validationStream = through(function(chunk, enc, next) {
750-
if (crc32c) {
751-
localCrc32cHash = crc.calculate(chunk, localCrc32cHash);
752-
}
753-
754-
if (md5) {
755-
localMd5Hash.update(chunk);
756-
}
757-
758-
this.push(chunk);
759-
next();
760-
});
761-
762-
validationStream.on('end', function() {
763-
if (crc32c) {
764-
localCrc32cHash = new Buffer([localCrc32cHash]).toString('base64');
765-
}
766-
767-
if (md5) {
768-
localMd5Hash = localMd5Hash.digest('base64');
769-
}
770-
});
771-
772710
throughStream
773711

774712
.pipe(gzip ? zlib.createGzip() : through())
775713

776-
.pipe(validationStream)
714+
.pipe(validateStream)
777715

778716
.pipe(writableStream)
779717

@@ -795,20 +733,15 @@ File.prototype.createWriteStream = function(options) {
795733
.on('complete', function(metadata) {
796734
var failed = false;
797735

798-
// We must remove the first four bytes from the returned checksum.
799-
// http://stackoverflow.com/questions/25096737/
800-
// base64-encoding-of-crc32c-long-value
736+
if (crc32c && metadata.crc32c) {
737+
// We must remove the first four bytes from the returned checksum.
738+
// http://stackoverflow.com/questions/25096737/
739+
// base64-encoding-of-crc32c-long-value
740+
failed = !validateStream.test('crc32c', metadata.crc32c.substr(4));
741+
}
801742

802-
if (validation === 'all') {
803-
if (metadata.md5Hash) {
804-
failed = localMd5Hash !== metadata.md5Hash;
805-
} else if (metadata.crc32c) {
806-
failed = localCrc32cHash !== metadata.crc32c.substr(4);
807-
}
808-
} else if (md5) {
809-
failed = localMd5Hash !== metadata.md5Hash;
810-
} else if (crc32c) {
811-
failed = localCrc32cHash !== metadata.crc32c.substr(4);
743+
if (md5 && metadata.md5Hash) {
744+
failed = !validateStream.test('md5', metadata.md5Hash);
812745
}
813746

814747
if (failed) {

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
"extend": "^2.0.0",
5959
"gce-images": "^0.1.0",
6060
"google-auto-auth": "^0.2.0",
61+
"hash-stream-validation": "^0.1.0",
6162
"is": "^3.0.1",
6263
"methmeth": "^1.0.0",
6364
"mime-types": "^2.0.8",
@@ -69,7 +70,6 @@
6970
"request": "^2.53.0",
7071
"retry-request": "^1.2.1",
7172
"split-array-stream": "^1.0.0",
72-
"sse4_crc32": "^3.1.0",
7373
"stream-events": "^1.0.1",
7474
"stream-forward": "^3.0.0",
7575
"string-format-obj": "^1.0.0",

test/bigquery/dataset.js

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@
1818

1919
'use strict';
2020

21-
// If we don't stub see4_crc32 and use mockery, we get "Module did not self-
22-
// register".
23-
var crc = require('sse4_crc32');
24-
2521
var arrify = require('arrify');
2622
var assert = require('assert');
2723
var util = require('../../lib/common/util');
@@ -49,7 +45,11 @@ describe('BigQuery/Dataset', function() {
4945
var ds;
5046

5147
before(function() {
52-
mockery.registerMock('sse4_crc32', crc);
48+
// If we don't stub see4_crc32 and use mockery, we get "Module did not self-
49+
// register".
50+
var crc32c = require('hash-stream-validation/node_modules/sse4_crc32');
51+
mockery.registerMock('sse4_crc32', crc32c);
52+
5353
mockery.registerMock('../common/stream-router.js', fakeStreamRouter);
5454
mockery.enable({
5555
useCleanCache: true,

0 commit comments

Comments
 (0)