'use strict'; var http = require('http'); var https = require('https'); var urllib = require('url'); var utillib = require('util'); var zlib = require('zlib'); var dns = require('dns'); var Stream = require('stream').Readable; var CookieJar = require('./cookiejar').CookieJar; var encodinglib = require('encoding'); var net = require('net'); var USE_ALLOC = typeof Buffer.alloc === 'function'; exports.FetchStream = FetchStream; exports.CookieJar = CookieJar; exports.fetchUrl = fetchUrl; function FetchStream(url, options) { Stream.call(this); options = options || {}; this.url = url; if (!this.url) { return this.emit('error', new Error('url not defined')); } this.userAgent = options.userAgent || 'FetchStream'; this._redirect_count = 0; this.options = options || {}; this.normalizeOptions(); // prevent errors before 'error' handler is set by defferring actions if (typeof setImmediate !== 'undefined') { setImmediate(this.runStream.bind(this, url)); } else { process.nextTick(this.runStream.bind(this, url)); } this.responseBuffer = USE_ALLOC ? Buffer.alloc(0, '', 'binary') : new Buffer(0, 'binary'); this.ended = false; this.readyToRead = 0; } utillib.inherits(FetchStream, Stream); FetchStream.prototype._read = function (size) { if (this.ended && this.responseBuffer.length === 0) { this.push(null); return; } this.readyToRead += size; this.drainBuffer(); }; FetchStream.prototype.drainBuffer = function () { if (this.readyToRead === 0) { return; } if (this.responseBuffer.length === 0) { return; } var push; var rest; var restSize; if (this.responseBuffer.length > this.readyToRead) { push = USE_ALLOC ? Buffer.alloc(this.readyToRead, '', 'binary') : new Buffer(this.readyToRead, 'binary'); this.responseBuffer.copy(push, 0, 0, this.readyToRead); restSize = this.responseBuffer.length - this.readyToRead; rest = USE_ALLOC ? Buffer.alloc(restSize, '', 'binary') : new Buffer(restSize, 'binary'); this.responseBuffer.copy(rest, 0, this.readyToRead); } else { push = this.responseBuffer; rest = USE_ALLOC ? Buffer.alloc(0, '', 'binary') : new Buffer(0, 'binary'); } this.responseBuffer = rest; this.readyToRead = 0; if (this.options.encoding) { this.push(push, this.options.encoding); } else { this.push(push); } }; FetchStream.prototype.destroy = function (ex) { this.emit('destroy', ex); }; FetchStream.prototype.normalizeOptions = function () { // cookiejar this.cookieJar = this.options.cookieJar || new CookieJar(); // default redirects - 10 // if disableRedirect is set, then 0 if (!this.options.disableRedirect && typeof this.options.maxRedirects !== 'number' && !(this.options.maxRedirects instanceof Number)) { this.options.maxRedirects = 10; } else if (this.options.disableRedirects) { this.options.maxRedirects = 0; } // normalize header keys // HTTP and HTTPS takes in key names in case insensitive but to find // an exact value from an object key name needs to be case sensitive // so we're just lowercasing all input keys this.options.headers = this.options.headers || {}; var keys = Object.keys(this.options.headers); var newheaders = {}; var i; for (i = keys.length - 1; i >= 0; i--) { newheaders[keys[i].toLowerCase().trim()] = this.options.headers[keys[i]]; } this.options.headers = newheaders; if (!this.options.headers['user-agent']) { this.options.headers['user-agent'] = this.userAgent; } if (!this.options.headers.pragma) { this.options.headers.pragma = 'no-cache'; } if (!this.options.headers['cache-control']) { this.options.headers['cache-control'] = 'no-cache'; } if (!this.options.disableGzip) { this.options.headers['accept-encoding'] = 'gzip, deflate'; } else { delete this.options.headers['accept-encoding']; } // max length for the response, // if not set, default is Infinity if (!this.options.maxResponseLength) { this.options.maxResponseLength = Infinity; } // method: // defaults to GET, or when payload present to POST if (!this.options.method) { this.options.method = this.options.payload || this.options.payloadSize ? 'POST' : 'GET'; } // set cookies // takes full cookie definition strings as params if (this.options.cookies) { for (i = 0; i < this.options.cookies.length; i++) { this.cookieJar.setCookie(this.options.cookies[i], this.url); } } // rejectUnauthorized if (typeof this.options.rejectUnauthorized === 'undefined') { this.options.rejectUnauthorized = true; } }; FetchStream.prototype.parseUrl = function (url) { var urlparts = urllib.parse(url, false, true), transport, urloptions = { host: urlparts.hostname || urlparts.host, port: urlparts.port, path: urlparts.pathname + (urlparts.search || '') || '/', method: this.options.method, rejectUnauthorized: this.options.rejectUnauthorized }; switch (urlparts.protocol) { case 'https:': transport = https; break; case 'http:': default: transport = http; break; } if (transport === https) { if ('agentHttps' in this.options) { urloptions.agent = this.options.agentHttps; } if ('agent' in this.options) { urloptions.agent = this.options.agent; } } else { if ('agentHttp' in this.options) { urloptions.agent = this.options.agentHttp; } if ('agent' in this.options) { urloptions.agent = this.options.agent; } } if (!urloptions.port) { switch (urlparts.protocol) { case 'https:': urloptions.port = 443; break; case 'http:': default: urloptions.port = 80; break; } } urloptions.headers = this.options.headers || {}; if (urlparts.auth) { var buf = USE_ALLOC ? Buffer.alloc(Buffer.byteLength(urlparts.auth), urlparts.auth) : new Buffer(urlparts.auth); urloptions.headers.Authorization = 'Basic ' + buf.toString('base64'); } return { urloptions: urloptions, transport: transport }; }; FetchStream.prototype.setEncoding = function (encoding) { this.options.encoding = encoding; }; FetchStream.prototype.runStream = function (url) { var url_data = this.parseUrl(url), cookies = this.cookieJar.getCookies(url); if (cookies) { url_data.urloptions.headers.cookie = cookies; } else { delete url_data.urloptions.headers.cookie; } if (this.options.payload) { url_data.urloptions.headers['content-length'] = Buffer.byteLength(this.options.payload || '', 'utf-8'); } if (this.options.payloadSize) { url_data.urloptions.headers['content-length'] = this.options.payloadSize; } if (this.options.asyncDnsLoookup) { var dnsCallback = (function (err, addresses) { if (err) { this.emit('error', err); return; } url_data.urloptions.headers.host = url_data.urloptions.hostname || url_data.urloptions.host; url_data.urloptions.hostname = addresses[0]; url_data.urloptions.host = url_data.urloptions.headers.host + (url_data.urloptions.port ? ':' + url_data.urloptions.port : ''); this._runStream(url_data, url); }).bind(this); if (net.isIP(url_data.urloptions.host)) { dnsCallback(null, [url_data.urloptions.host]); } else { dns.resolve4(url_data.urloptions.host, dnsCallback); } } else { this._runStream(url_data, url); } }; FetchStream.prototype._runStream = function (url_data, url) { var req = url_data.transport.request(url_data.urloptions, (function (res) { // catch new cookies before potential redirect if (Array.isArray(res.headers['set-cookie'])) { for (var i = 0; i < res.headers['set-cookie'].length; i++) { this.cookieJar.setCookie(res.headers['set-cookie'][i], url); } } if ([301, 302, 303, 307, 308].indexOf(res.statusCode) >= 0) { if (!this.options.disableRedirects && this.options.maxRedirects > this._redirect_count && res.headers.location) { this._redirect_count++; req.destroy(); this.runStream(urllib.resolve(url, res.headers.location)); return; } } this.meta = { status: res.statusCode, responseHeaders: res.headers, finalUrl: url, redirectCount: this._redirect_count, cookieJar: this.cookieJar }; var curlen = 0, maxlen, receive = (function (chunk) { if (curlen + chunk.length > this.options.maxResponseLength) { maxlen = this.options.maxResponseLength - curlen; } else { maxlen = chunk.length; } if (maxlen <= 0) { return; } curlen += Math.min(maxlen, chunk.length); if (maxlen >= chunk.length) { if (this.responseBuffer.length === 0) { this.responseBuffer = chunk; } else { this.responseBuffer = Buffer.concat([this.responseBuffer, chunk]); } } else { this.responseBuffer = Buffer.concat([this.responseBuffer, chunk], this.responseBuffer.length + maxlen); } this.drainBuffer(); }).bind(this), error = (function (e) { this.ended = true; this.emit('error', e); this.drainBuffer(); }).bind(this), end = (function () { this.ended = true; if (this.responseBuffer.length === 0) { this.push(null); } }).bind(this), unpack = (function (type, res) { var z = zlib['create' + type](); z.on('data', receive); z.on('error', error); z.on('end', end); res.pipe(z); }).bind(this); this.emit('meta', this.meta); if (res.headers['content-encoding']) { switch (res.headers['content-encoding'].toLowerCase().trim()) { case 'gzip': return unpack('Gunzip', res); case 'deflate': return unpack('InflateRaw', res); } } res.on('data', receive); res.on('end', end); }).bind(this)); req.on('error', (function (e) { this.emit('error', e); }).bind(this)); if (this.options.timeout) { req.setTimeout(this.options.timeout, req.abort.bind(req)); } this.on('destroy', req.abort.bind(req)); if (this.options.payload) { req.end(this.options.payload); } else if (this.options.payloadStream) { this.options.payloadStream.pipe(req); this.options.payloadStream.resume(); } else { req.end(); } }; function fetchUrl(url, options, callback) { if (!callback && typeof options === 'function') { callback = options; options = undefined; } options = options || {}; var fetchstream = new FetchStream(url, options), response_data, chunks = [], length = 0, curpos = 0, buffer, content_type, callbackFired = false; fetchstream.on('meta', function (meta) { response_data = meta; content_type = _parseContentType(meta.responseHeaders['content-type']); }); fetchstream.on('data', function (chunk) { if (chunk) { chunks.push(chunk); length += chunk.length; } }); fetchstream.on('error', function (error) { if (error && error.code === 'HPE_INVALID_CONSTANT') { // skip invalid formatting errors return; } if (callbackFired) { return; } callbackFired = true; callback(error); }); fetchstream.on('end', function () { if (callbackFired) { return; } callbackFired = true; buffer = USE_ALLOC ? Buffer.alloc(length) : new Buffer(length); for (var i = 0, len = chunks.length; i < len; i++) { chunks[i].copy(buffer, curpos); curpos += chunks[i].length; } if (content_type.mimeType === 'text/html') { content_type.charset = _findHTMLCharset(buffer) || content_type.charset; } content_type.charset = (options.overrideCharset || content_type.charset || 'utf-8').trim().toLowerCase(); if (!options.disableDecoding && !content_type.charset.match(/^utf-?8$/i)) { buffer = encodinglib.convert(buffer, 'UTF-8', content_type.charset); } if (options.outputEncoding) { return callback(null, response_data, buffer.toString(options.outputEncoding)); } else { return callback(null, response_data, buffer); } }); } function _parseContentType(str) { if (!str) { return {}; } var parts = str.split(';'), mimeType = parts.shift(), charset, chparts; for (var i = 0, len = parts.length; i < len; i++) { chparts = parts[i].split('='); if (chparts.length > 1) { if (chparts[0].trim().toLowerCase() === 'charset') { charset = chparts[1]; } } } return { mimeType: (mimeType || '').trim().toLowerCase(), charset: (charset || 'UTF-8').trim().toLowerCase() // defaults to UTF-8 }; } function _findHTMLCharset(htmlbuffer) { var body = htmlbuffer.toString('ascii'), input, meta, charset; if ((meta = body.match(/]*?>/i))) { input = meta[0]; } if (input) { charset = input.match(/charset\s?=\s?([a-zA-Z\-0-9]*);?/); if (charset) { charset = (charset[1] || '').trim().toLowerCase(); } } if (!charset && (meta = body.match(/