javascript - node js azure SDK getBlobToStream uses lots of memory -
i writing backup script downloads blobs in blob containers of specific azure account.
the script uses async.js make sure threads can run @ same time doesn't overload server. when run script works fine, when hits large files runs out of memory. i'm guessing download runs faster disk can write, , fills in-memory buffer badly run out of memory entirely, debugging exact cause has been impossible far.
the specific function appears use lot of memory called follows:
blobservice.getblobtostream( containername, blob.name, fs.createwritestream(fullpath), function(error) { if(error){ //something went wrong, write console finish queue item , continue. console.log("failed writing " + blob.name + " (" + error + ")"); callback(); } else if(!error) { //write last modified date , finish queue item silently fs.writefile(fullpath + ".date", bloblastmodified, function(err) { if(err) console.log("couldn't write .date file: " + err); }); callback(); } }); even single 700mb download fill 1gb of memory on side.
is there way around this? missing parameter magically prevents azure sdk buffering , kitchen sink?
full code:
#!/usr/bin/env node //requires var azure = require('azure'); var fs = require('fs'); var mkdirp = require('mkdirp'); var path = require('path'); var async = require('async'); var maxconcurrency = 1; //max amount of simultaneous running threads of getblobsandsavethem() running through async.js. var blobservice = azure.createblobservice(); backupprefix='/backups/azurebackup/' //always end '/'!! //main flow of script near bottom of file. var containerprocessingqueue = async.queue( function getblobsandsavethem(containername) { console.log(containername); //debug blobservice.listblobs(containername, function(error, blobs) { if(!error){ var blobprocessingqueue = async.queue(function(index,callback) { var blob = blobs[index]; console.log(blob); //debug var fullpath = backupprefix + containername + '/' + blob.name; var bloblastmodified = new date(blob.properties['last-modified']); //only create if directoy doesn't exist, since mkdirp fails if directory exists. if(!fs.existssync(path.dirname(fullpath))){ //and sync, because otherwise it'll check 99999 times if directory exists simultaneously, doesn't find it, fails create 99998 times. mkdirp.sync(path.dirname(fullpath), function(err) { console.log('failed create directory ' + path.dirname(fullpath) + " ("+ err + ")"); }); } if(fs.existssync(fullpath + ".date")){ if(bloblastmodified == fs.readfilesync(fullpath + ".date").tostring()) { callback(); return; //if file unmodified, return. no won't exit program, because it's called within function definition (async.queue(function ...)) } } blobservice.getblobtostream( containername, blob.name, fs.createwritestream(fullpath), function(error) { if(error){ //something went wrong, write console finish queue item , continue. console.log("failed writing " + blob.name + " (" + error + ")"); callback(); } else if(!error) { //write last modified date , finish queue item silently fs.writefile(fullpath + ".date", bloblastmodified, function(err) { if(err) console.log("couldn't write .date file: " + err); }); callback(); } }); },maxconcurrency); for(var blobindex in blobs){ blobprocessingqueue.push(blobindex); } //push new items queue processing } else { console.log("an error occurred listing blobs: " + error); } }); },1); blobservice.listcontainers(function(err, result){ for(var i=0;i<result.length;i++) { containerprocessingqueue.push(result[i].name); } });
one thing possibly read chunk of data stream instead of whole blob data, append file , read next chunk. blob storage service supports that. if @ source code getblobtostream (https://github.com/windowsazure/azure-sdk-for-node/blob/master/lib/services/blob/blobservice.js), can specify from/to bytes in options - rangestartheader , rangeendheader. see if helps.
i have hacked code (as can see code, knowledge node.js quite primitive :)). [please use code idea how can chunked download think still has glitches]
var azure = require('azure'); var fs = require('fs'); var blobservice = azure.createblobservice("account", "accountkey"); var containername = "container name"; var blobname = "blob name"; var blobsize; var chunksize = 1024 * 512;//chunk size -- we'll read 512 kb @ time. var startpos = 0; var fullpath = "d:\\node\\"; var blobproperties = blobservice.getblobproperties(containername, blobname, null, function (error, blob) { if (error) { throw error; } else { blobsize = blob.contentlength; fullpath = fullpath + blobname; console.log(fullpath); dodownload(); } } ); function dodownload() { var stream = fs.createwritestream(fullpath, {flags: 'a'}); var endpos = startpos + chunksize; if (endpos > blobsize) { endpos = blobsize; } console.log("downloading " + (endpos - startpos) + " bytes starting " + startpos + " marker."); blobservice.getblobtostream("test", blobname, stream, { "rangestartheader": startpos, "rangeendheader": endpos-1 }, function(error) { if (error) { throw error; } else if (!error) { startpos = endpos; if (startpos <= blobsize - 1) { dodownload(); } } }); }
Comments
Post a Comment