Sunday, 15 September 2013

(node) warning: possible EventEmitter memory leak detected

(node) warning: possible EventEmitter memory leak detected

I wrote a little scraper script built on top of cheerio.js and request.js
to get contact information (URLs and emails) off of an booking agency's
site. While it does run and returns all the information I need, I'm
getting the following warning 5 times in a row while running it:
(node) warning: possible EventEmitter memory leak detected. 11 listeners
added. Use emitter.setMaxListeners() to increase limit.
Trace
at Socket.EventEmitter.addListener (events.js:160:15)
at Socket.Readable.on (_stream_readable.js:689:33)
at Socket.EventEmitter.once (events.js:179:8)
at Request.onResponse
(/home/max/Desktop/scraping/node_modules/request/request.js:625:25)
at ClientRequest.g (events.js:175:14)
at ClientRequest.EventEmitter.emit (events.js:95:17)
at HTTPParser.parserOnIncomingClient [as onIncoming] (http.js:1689:21)
at HTTPParser.parserOnHeadersComplete [as onHeadersComplete]
(http.js:120:23)
at Socket.socketOnData [as ondata] (http.js:1584:20)
at TCP.onread (net.js:525:27)
My instinct is telling me I am probably getting this warning because of
the fact that I nested a request with in another request. I am not certain
though but what I do know is that the warning shows up right when the
nested request located in one of the getArtistInfo() function's cheerio.js
.each() loop is called. (look at code below to see what I mean)
Here is the code for my scraper:
var request = require('request');
var cheerio = require('cheerio');
var url = 'http://www.primarytalent.com/';
var getManyArtistsInfo = function(url){
request(url, (function(){
return function(err, resp, body) {
if(err)
throw err;
$ = cheerio.load(body);
// TODO: scraping goes here
$('#rosterlists div li a').each(function(){
var urlCap = this[0]['attribs']['href'].slice(1);
var artistURL = url.concat(urlCap);
console.log(artistURL);
getArtistInfo(artistURL);
});
}
})());
}
var getArtistInfo = function(artistURL){
request(artistURL, (function(){
return function(err, resp, body) {
if(err)
throw err;
$ = cheerio.load(body);
console.log("NOW SCRAPING artist's PAGE")
var artistName = "";
$('#content #col3-1 h1').each(function(){
artistName = this.text();
console.log(artistName);
});
$('#content #col3-1 #links li a').each(function(){
var socialURL = this.attr('href');
var siteURL = "";
var facebookURL = "";
var twitterURL = "";
var soundcloudURL = "";
var bandcampURL = "";
var myspaceURL = "";
switch(socialURL) {
case socialURL.indexOf("facebook"):
facebookURL = socialURL;
console.log(facebookURL);
break;
case socialURL.indexOf("twitter"):
twitterURL = socialURL;
console.log(twitterURL);
break;
case socialURL.indexOf("soundcloud"):
soundcloudURL = socialURL;
console.log(soundcloudURL);
break;
case socialURL.indexOf("bandcamp"):
bandcampURL = socialURL;
console.log(bandcampURL);
break;
case socialURL.indexOf("myspace"):
myspaceURL = socialURL;
console.log(myspaceURL);
break;
default:
siteURL = socialURL;
console.log(siteURL)
}
});
// get agentURL
$('#content #col3-1 .contacts li a').each(function(){
var agentURL = url + this.attr('href').slice(1);
console.log("Agent url is : " + agentURL);
request(agentURL, artistName, (function(){
return function(err, resp, body) {
if(err)
throw err;
$ = cheerio.load(body);
console.log("NOW SCRAPING AGENT'S PAGE")
var agentName = $('#content #col3-1 #details li h1').text();
console.log(agentName + ' reps ' + artistName);
var agentEmail = $('#content #col3-1 #details li
a').attr("href").slice(7);
console.log(agentEmail);
var agentPhone = $('#content #col3-1 #details li').last().text();
console.log(agentPhone);
var agentArtistList = [];
$('#content #col3-1 #artists li a').each(function(){
agentArtistList.push(this.text());
});
console.log(agentName + ' represents ' +
agentArtistList.length + ' artists!');
}
})(agentURL, artistName));
});
}
})(artistURL));
}
getManyArtistsInfo(url);
Did I make spaghetti mess here?
How can I stop this EventEmitter memory leak issue from happening?

No comments:

Post a Comment