javascript - node js azure SDK getBlobToStream uses lots of memory -

i writing backup script downloads blobs in blob containers of specific azure account.

the script uses async.js make sure threads can run @ same time doesn't overload server. when run script works fine, when hits large files runs out of memory. i'm guessing download runs faster disk can write, , fills in-memory buffer badly run out of memory entirely, debugging exact cause has been impossible far.

the specific function appears use lot of memory called follows:

blobservice.getblobtostream(   containername,   blob.name,   fs.createwritestream(fullpath),   function(error) {     if(error){ //something went wrong, write console finish queue item , continue.     console.log("failed writing " + blob.name + " (" + error + ")");     callback();     }     else if(!error) { //write last modified date , finish queue item silently     fs.writefile(fullpath + ".date", bloblastmodified, function(err)     { if(err) console.log("couldn't write .date file: " + err); });     callback();     }     });

even single 700mb download fill 1gb of memory on side.

is there way around this? missing parameter magically prevents azure sdk buffering , kitchen sink?

full code:

#!/usr/bin/env node  //requires var azure = require('azure'); var fs    = require('fs'); var mkdirp = require('mkdirp'); var path  = require('path'); var async = require('async');  var maxconcurrency = 1; //max amount of simultaneous running threads of getblobsandsavethem() running through async.js.  var blobservice = azure.createblobservice();  backupprefix='/backups/azurebackup/' //always end '/'!!  //main flow of script near bottom of file. var containerprocessingqueue = async.queue(  function getblobsandsavethem(containername) { console.log(containername); //debug   blobservice.listblobs(containername,    function(error, blobs) {      if(!error){         var blobprocessingqueue =          async.queue(function(index,callback) {                 var blob = blobs[index];                 console.log(blob); //debug                 var fullpath = backupprefix + containername + '/' + blob.name;                 var bloblastmodified = new date(blob.properties['last-modified']);                  //only create if directoy doesn't exist, since mkdirp fails if directory exists.                 if(!fs.existssync(path.dirname(fullpath))){ //and sync, because otherwise it'll check 99999 times if directory exists simultaneously, doesn't find it, fails create 99998 times.                         mkdirp.sync(path.dirname(fullpath), function(err) { console.log('failed create directory ' + path.dirname(fullpath) + " ("+ err + ")"); });                         }                   if(fs.existssync(fullpath + ".date")){                         if(bloblastmodified == fs.readfilesync(fullpath + ".date").tostring()) {                                 callback();                                 return; //if file unmodified, return. no won't exit program, because it's called within function definition (async.queue(function ...))                                 }                         }                  blobservice.getblobtostream(                   containername,                   blob.name,                   fs.createwritestream(fullpath),                   function(error) {                         if(error){ //something went wrong, write console finish queue item , continue.                                 console.log("failed writing " + blob.name + " (" + error + ")");                                 callback();                                 }                         else if(!error) { //write last modified date , finish queue item silently                                 fs.writefile(fullpath + ".date", bloblastmodified, function(err)                                 { if(err) console.log("couldn't write .date file: " + err); });                                 callback();                                 }                            });                  },maxconcurrency);          for(var blobindex in blobs){                 blobprocessingqueue.push(blobindex);                  } //push new items queue processing            }         else {          console.log("an error occurred listing blobs: " + error);         } }); },1);  blobservice.listcontainers(function(err, result){         for(var i=0;i<result.length;i++) {                 containerprocessingqueue.push(result[i].name);         } });

one thing possibly read chunk of data stream instead of whole blob data, append file , read next chunk. blob storage service supports that. if @ source code getblobtostream (https://github.com/windowsazure/azure-sdk-for-node/blob/master/lib/services/blob/blobservice.js), can specify from/to bytes in options - rangestartheader , rangeendheader. see if helps.

i have hacked code (as can see code, knowledge node.js quite primitive :)). [please use code idea how can chunked download think still has glitches]

var azure = require('azure'); var fs = require('fs');  var blobservice = azure.createblobservice("account", "accountkey"); var containername = "container name"; var blobname = "blob name"; var blobsize; var chunksize = 1024 * 512;//chunk size -- we'll read 512 kb @ time. var startpos = 0; var fullpath = "d:\\node\\"; var blobproperties = blobservice.getblobproperties(containername, blobname, null, function (error, blob) {         if (error) {             throw error;         }         else    {             blobsize = blob.contentlength;             fullpath = fullpath + blobname;             console.log(fullpath);             dodownload();         }     } );  function dodownload() {     var stream = fs.createwritestream(fullpath, {flags: 'a'});     var endpos = startpos + chunksize;     if (endpos > blobsize) {         endpos = blobsize;     }     console.log("downloading " + (endpos - startpos) + " bytes starting " + startpos + " marker.");     blobservice.getblobtostream("test", blobname, stream,          { "rangestartheader": startpos, "rangeendheader": endpos-1 }, function(error) {         if (error) {             throw error;         }         else if (!error) {             startpos = endpos;             if (startpos <= blobsize - 1) {                 dodownload();             }         }     }); }

Search This Blog

Bradly

javascript - node js azure SDK getBlobToStream uses lots of memory -

Comments

Post a Comment

Popular posts from this blog

iphone - Three second countdown in cocos2d -

hyperlink - how to do url routing in php -

c - Avoiding Extra Malloc in Linked List (node->next = NULL) -