501 lines
15 KiB
JavaScript
501 lines
15 KiB
JavaScript
'use strict';
|
|
|
|
var http = require('http');
|
|
var https = require('https');
|
|
var urllib = require('url');
|
|
var utillib = require('util');
|
|
var zlib = require('zlib');
|
|
var dns = require('dns');
|
|
var Stream = require('stream').Readable;
|
|
var CookieJar = require('./cookiejar').CookieJar;
|
|
var encodinglib = require('encoding');
|
|
var net = require('net');
|
|
|
|
var USE_ALLOC = typeof Buffer.alloc === 'function';
|
|
|
|
exports.FetchStream = FetchStream;
|
|
exports.CookieJar = CookieJar;
|
|
exports.fetchUrl = fetchUrl;
|
|
|
|
function FetchStream(url, options) {
|
|
Stream.call(this);
|
|
|
|
options = options || {};
|
|
|
|
this.url = url;
|
|
if (!this.url) {
|
|
return this.emit('error', new Error('url not defined'));
|
|
}
|
|
|
|
this.userAgent = options.userAgent || 'FetchStream';
|
|
|
|
this._redirect_count = 0;
|
|
|
|
this.options = options || {};
|
|
this.normalizeOptions();
|
|
|
|
// prevent errors before 'error' handler is set by defferring actions
|
|
if (typeof setImmediate !== 'undefined') {
|
|
setImmediate(this.runStream.bind(this, url));
|
|
} else {
|
|
process.nextTick(this.runStream.bind(this, url));
|
|
}
|
|
this.responseBuffer = USE_ALLOC ? Buffer.alloc(0, '', 'binary') : new Buffer(0, 'binary');
|
|
this.ended = false;
|
|
this.readyToRead = 0;
|
|
}
|
|
utillib.inherits(FetchStream, Stream);
|
|
|
|
FetchStream.prototype._read = function (size) {
|
|
if (this.ended && this.responseBuffer.length === 0) {
|
|
this.push(null);
|
|
return;
|
|
}
|
|
this.readyToRead += size;
|
|
this.drainBuffer();
|
|
};
|
|
|
|
FetchStream.prototype.drainBuffer = function () {
|
|
if (this.readyToRead === 0) {
|
|
return;
|
|
}
|
|
if (this.responseBuffer.length === 0) {
|
|
return;
|
|
}
|
|
var push;
|
|
var rest;
|
|
var restSize;
|
|
|
|
if (this.responseBuffer.length > this.readyToRead) {
|
|
push = USE_ALLOC ? Buffer.alloc(this.readyToRead, '', 'binary') : new Buffer(this.readyToRead, 'binary');
|
|
this.responseBuffer.copy(push, 0, 0, this.readyToRead);
|
|
restSize = this.responseBuffer.length - this.readyToRead;
|
|
rest = USE_ALLOC ? Buffer.alloc(restSize, '', 'binary') : new Buffer(restSize, 'binary');
|
|
this.responseBuffer.copy(rest, 0, this.readyToRead);
|
|
} else {
|
|
push = this.responseBuffer;
|
|
rest = USE_ALLOC ? Buffer.alloc(0, '', 'binary') : new Buffer(0, 'binary');
|
|
}
|
|
this.responseBuffer = rest;
|
|
this.readyToRead = 0;
|
|
if (this.options.encoding) {
|
|
this.push(push, this.options.encoding);
|
|
} else {
|
|
this.push(push);
|
|
}
|
|
};
|
|
|
|
FetchStream.prototype.destroy = function (ex) {
|
|
this.emit('destroy', ex);
|
|
};
|
|
|
|
FetchStream.prototype.normalizeOptions = function () {
|
|
|
|
// cookiejar
|
|
this.cookieJar = this.options.cookieJar || new CookieJar();
|
|
|
|
// default redirects - 10
|
|
// if disableRedirect is set, then 0
|
|
if (!this.options.disableRedirect && typeof this.options.maxRedirects !== 'number' &&
|
|
!(this.options.maxRedirects instanceof Number)) {
|
|
this.options.maxRedirects = 10;
|
|
} else if (this.options.disableRedirects) {
|
|
this.options.maxRedirects = 0;
|
|
}
|
|
|
|
// normalize header keys
|
|
// HTTP and HTTPS takes in key names in case insensitive but to find
|
|
// an exact value from an object key name needs to be case sensitive
|
|
// so we're just lowercasing all input keys
|
|
this.options.headers = this.options.headers || {};
|
|
|
|
var keys = Object.keys(this.options.headers);
|
|
var newheaders = {};
|
|
var i;
|
|
|
|
for (i = keys.length - 1; i >= 0; i--) {
|
|
newheaders[keys[i].toLowerCase().trim()] = this.options.headers[keys[i]];
|
|
}
|
|
|
|
this.options.headers = newheaders;
|
|
|
|
if (!this.options.headers['user-agent']) {
|
|
this.options.headers['user-agent'] = this.userAgent;
|
|
}
|
|
|
|
if (!this.options.headers.pragma) {
|
|
this.options.headers.pragma = 'no-cache';
|
|
}
|
|
|
|
if (!this.options.headers['cache-control']) {
|
|
this.options.headers['cache-control'] = 'no-cache';
|
|
}
|
|
|
|
if (!this.options.disableGzip) {
|
|
this.options.headers['accept-encoding'] = 'gzip, deflate';
|
|
} else {
|
|
delete this.options.headers['accept-encoding'];
|
|
}
|
|
|
|
// max length for the response,
|
|
// if not set, default is Infinity
|
|
if (!this.options.maxResponseLength) {
|
|
this.options.maxResponseLength = Infinity;
|
|
}
|
|
|
|
// method:
|
|
// defaults to GET, or when payload present to POST
|
|
if (!this.options.method) {
|
|
this.options.method = this.options.payload || this.options.payloadSize ? 'POST' : 'GET';
|
|
}
|
|
|
|
// set cookies
|
|
// takes full cookie definition strings as params
|
|
if (this.options.cookies) {
|
|
for (i = 0; i < this.options.cookies.length; i++) {
|
|
this.cookieJar.setCookie(this.options.cookies[i], this.url);
|
|
}
|
|
}
|
|
|
|
// rejectUnauthorized
|
|
if (typeof this.options.rejectUnauthorized === 'undefined') {
|
|
this.options.rejectUnauthorized = true;
|
|
}
|
|
};
|
|
|
|
FetchStream.prototype.parseUrl = function (url) {
|
|
var urlparts = urllib.parse(url, false, true),
|
|
transport,
|
|
urloptions = {
|
|
host: urlparts.hostname || urlparts.host,
|
|
port: urlparts.port,
|
|
path: urlparts.pathname + (urlparts.search || '') || '/',
|
|
method: this.options.method,
|
|
rejectUnauthorized: this.options.rejectUnauthorized
|
|
};
|
|
|
|
switch (urlparts.protocol) {
|
|
case 'https:':
|
|
transport = https;
|
|
break;
|
|
case 'http:':
|
|
default:
|
|
transport = http;
|
|
break;
|
|
}
|
|
|
|
if (transport === https) {
|
|
if('agentHttps' in this.options){
|
|
urloptions.agent = this.options.agentHttps;
|
|
}
|
|
if('agent' in this.options){
|
|
urloptions.agent = this.options.agent;
|
|
}
|
|
} else {
|
|
if('agentHttp' in this.options){
|
|
urloptions.agent = this.options.agentHttp;
|
|
}
|
|
if('agent' in this.options){
|
|
urloptions.agent = this.options.agent;
|
|
}
|
|
}
|
|
|
|
if (!urloptions.port) {
|
|
switch (urlparts.protocol) {
|
|
case 'https:':
|
|
urloptions.port = 443;
|
|
break;
|
|
case 'http:':
|
|
default:
|
|
urloptions.port = 80;
|
|
break;
|
|
}
|
|
}
|
|
|
|
urloptions.headers = this.options.headers || {};
|
|
|
|
if (urlparts.auth) {
|
|
var buf = USE_ALLOC ? Buffer.alloc(Buffer.byteLength(urlparts.auth), urlparts.auth) : new Buffer(urlparts.auth);
|
|
urloptions.headers.Authorization = 'Basic ' + buf.toString('base64');
|
|
}
|
|
|
|
return {
|
|
urloptions: urloptions,
|
|
transport: transport
|
|
};
|
|
};
|
|
|
|
FetchStream.prototype.setEncoding = function (encoding) {
|
|
this.options.encoding = encoding;
|
|
};
|
|
|
|
FetchStream.prototype.runStream = function (url) {
|
|
var url_data = this.parseUrl(url),
|
|
cookies = this.cookieJar.getCookies(url);
|
|
|
|
if (cookies) {
|
|
url_data.urloptions.headers.cookie = cookies;
|
|
} else {
|
|
delete url_data.urloptions.headers.cookie;
|
|
}
|
|
|
|
if (this.options.payload) {
|
|
url_data.urloptions.headers['content-length'] = Buffer.byteLength(this.options.payload || '', 'utf-8');
|
|
}
|
|
|
|
if (this.options.payloadSize) {
|
|
url_data.urloptions.headers['content-length'] = this.options.payloadSize;
|
|
}
|
|
|
|
if (this.options.asyncDnsLoookup) {
|
|
var dnsCallback = (function (err, addresses) {
|
|
if (err) {
|
|
this.emit('error', err);
|
|
return;
|
|
}
|
|
|
|
url_data.urloptions.headers.host = url_data.urloptions.hostname || url_data.urloptions.host;
|
|
url_data.urloptions.hostname = addresses[0];
|
|
url_data.urloptions.host = url_data.urloptions.headers.host + (url_data.urloptions.port ? ':' + url_data.urloptions.port : '');
|
|
|
|
this._runStream(url_data, url);
|
|
}).bind(this);
|
|
|
|
if (net.isIP(url_data.urloptions.host)) {
|
|
dnsCallback(null, [url_data.urloptions.host]);
|
|
} else {
|
|
dns.resolve4(url_data.urloptions.host, dnsCallback);
|
|
}
|
|
} else {
|
|
this._runStream(url_data, url);
|
|
}
|
|
};
|
|
|
|
FetchStream.prototype._runStream = function (url_data, url) {
|
|
|
|
var req = url_data.transport.request(url_data.urloptions, (function (res) {
|
|
|
|
// catch new cookies before potential redirect
|
|
if (Array.isArray(res.headers['set-cookie'])) {
|
|
for (var i = 0; i < res.headers['set-cookie'].length; i++) {
|
|
this.cookieJar.setCookie(res.headers['set-cookie'][i], url);
|
|
}
|
|
}
|
|
|
|
if ([301, 302, 303, 307, 308].indexOf(res.statusCode) >= 0) {
|
|
if (!this.options.disableRedirects && this.options.maxRedirects > this._redirect_count && res.headers.location) {
|
|
this._redirect_count++;
|
|
req.destroy();
|
|
this.runStream(urllib.resolve(url, res.headers.location));
|
|
return;
|
|
}
|
|
}
|
|
|
|
this.meta = {
|
|
status: res.statusCode,
|
|
responseHeaders: res.headers,
|
|
finalUrl: url,
|
|
redirectCount: this._redirect_count,
|
|
cookieJar: this.cookieJar
|
|
};
|
|
|
|
var curlen = 0,
|
|
maxlen,
|
|
|
|
receive = (function (chunk) {
|
|
if (curlen + chunk.length > this.options.maxResponseLength) {
|
|
maxlen = this.options.maxResponseLength - curlen;
|
|
} else {
|
|
maxlen = chunk.length;
|
|
}
|
|
|
|
if (maxlen <= 0) {
|
|
return;
|
|
}
|
|
|
|
curlen += Math.min(maxlen, chunk.length);
|
|
if (maxlen >= chunk.length) {
|
|
if (this.responseBuffer.length === 0) {
|
|
this.responseBuffer = chunk;
|
|
} else {
|
|
this.responseBuffer = Buffer.concat([this.responseBuffer, chunk]);
|
|
}
|
|
} else {
|
|
this.responseBuffer = Buffer.concat([this.responseBuffer, chunk], this.responseBuffer.length + maxlen);
|
|
}
|
|
this.drainBuffer();
|
|
}).bind(this),
|
|
|
|
error = (function (e) {
|
|
this.ended = true;
|
|
this.emit('error', e);
|
|
this.drainBuffer();
|
|
}).bind(this),
|
|
|
|
end = (function () {
|
|
this.ended = true;
|
|
if (this.responseBuffer.length === 0) {
|
|
this.push(null);
|
|
}
|
|
}).bind(this),
|
|
|
|
unpack = (function (type, res) {
|
|
var z = zlib['create' + type]();
|
|
z.on('data', receive);
|
|
z.on('error', error);
|
|
z.on('end', end);
|
|
res.pipe(z);
|
|
}).bind(this);
|
|
|
|
this.emit('meta', this.meta);
|
|
|
|
if (res.headers['content-encoding']) {
|
|
switch (res.headers['content-encoding'].toLowerCase().trim()) {
|
|
case 'gzip':
|
|
return unpack('Gunzip', res);
|
|
case 'deflate':
|
|
return unpack('InflateRaw', res);
|
|
}
|
|
}
|
|
|
|
res.on('data', receive);
|
|
res.on('end', end);
|
|
|
|
}).bind(this));
|
|
|
|
req.on('error', (function (e) {
|
|
this.emit('error', e);
|
|
}).bind(this));
|
|
|
|
if (this.options.timeout) {
|
|
req.setTimeout(this.options.timeout, req.abort.bind(req));
|
|
}
|
|
this.on('destroy', req.abort.bind(req));
|
|
|
|
if (this.options.payload) {
|
|
req.end(this.options.payload);
|
|
} else if (this.options.payloadStream) {
|
|
this.options.payloadStream.pipe(req);
|
|
this.options.payloadStream.resume();
|
|
} else {
|
|
req.end();
|
|
}
|
|
};
|
|
|
|
function fetchUrl(url, options, callback) {
|
|
if (!callback && typeof options === 'function') {
|
|
callback = options;
|
|
options = undefined;
|
|
}
|
|
options = options || {};
|
|
|
|
var fetchstream = new FetchStream(url, options),
|
|
response_data, chunks = [],
|
|
length = 0,
|
|
curpos = 0,
|
|
buffer,
|
|
content_type,
|
|
callbackFired = false;
|
|
|
|
fetchstream.on('meta', function (meta) {
|
|
response_data = meta;
|
|
content_type = _parseContentType(meta.responseHeaders['content-type']);
|
|
});
|
|
|
|
fetchstream.on('data', function (chunk) {
|
|
if (chunk) {
|
|
chunks.push(chunk);
|
|
length += chunk.length;
|
|
}
|
|
});
|
|
|
|
fetchstream.on('error', function (error) {
|
|
if (error && error.code === 'HPE_INVALID_CONSTANT') {
|
|
// skip invalid formatting errors
|
|
return;
|
|
}
|
|
if (callbackFired) {
|
|
return;
|
|
}
|
|
callbackFired = true;
|
|
callback(error);
|
|
});
|
|
|
|
fetchstream.on('end', function () {
|
|
if (callbackFired) {
|
|
return;
|
|
}
|
|
callbackFired = true;
|
|
|
|
buffer = USE_ALLOC ? Buffer.alloc(length) : new Buffer(length);
|
|
for (var i = 0, len = chunks.length; i < len; i++) {
|
|
chunks[i].copy(buffer, curpos);
|
|
curpos += chunks[i].length;
|
|
}
|
|
|
|
if (content_type.mimeType === 'text/html') {
|
|
content_type.charset = _findHTMLCharset(buffer) || content_type.charset;
|
|
}
|
|
|
|
content_type.charset = (options.overrideCharset || content_type.charset || 'utf-8').trim().toLowerCase();
|
|
|
|
|
|
if (!options.disableDecoding && !content_type.charset.match(/^utf-?8$/i)) {
|
|
buffer = encodinglib.convert(buffer, 'UTF-8', content_type.charset);
|
|
}
|
|
|
|
if (options.outputEncoding) {
|
|
return callback(null, response_data, buffer.toString(options.outputEncoding));
|
|
} else {
|
|
return callback(null, response_data, buffer);
|
|
}
|
|
|
|
});
|
|
}
|
|
|
|
function _parseContentType(str) {
|
|
if (!str) {
|
|
return {};
|
|
}
|
|
var parts = str.split(';'),
|
|
mimeType = parts.shift(),
|
|
charset, chparts;
|
|
|
|
for (var i = 0, len = parts.length; i < len; i++) {
|
|
chparts = parts[i].split('=');
|
|
if (chparts.length > 1) {
|
|
if (chparts[0].trim().toLowerCase() === 'charset') {
|
|
charset = chparts[1];
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
mimeType: (mimeType || '').trim().toLowerCase(),
|
|
charset: (charset || 'UTF-8').trim().toLowerCase() // defaults to UTF-8
|
|
};
|
|
}
|
|
|
|
function _findHTMLCharset(htmlbuffer) {
|
|
|
|
var body = htmlbuffer.toString('ascii'),
|
|
input, meta, charset;
|
|
|
|
if ((meta = body.match(/<meta\s+http-equiv=["']content-type["'][^>]*?>/i))) {
|
|
input = meta[0];
|
|
}
|
|
|
|
if (input) {
|
|
charset = input.match(/charset\s?=\s?([a-zA-Z\-0-9]*);?/);
|
|
if (charset) {
|
|
charset = (charset[1] || '').trim().toLowerCase();
|
|
}
|
|
}
|
|
|
|
if (!charset && (meta = body.match(/<meta\s+charset=["'](.*?)["']/i))) {
|
|
charset = (meta[1] || '').trim().toLowerCase();
|
|
}
|
|
|
|
return charset;
|
|
}
|