Node-Elastical: Implement stats function

Recently, I had to find the size of an index in Elasticsearch. I could have just built my query using request like this:

var request = require('request');
request.get('http://localhost:9200/' + myIndex + '/_stats'
  , function (err, res, body) {
    console.log(res._all.primaries.store.size_in_bytes);
});

Fast and easy. Since it’s not available yet in node-elastical, I found it better to add this functionality so that other people can use it. Implementing the stats function wasn’t difficult. It just require parsing options to create the right url and then making the request (with request) to Elasticsearch. I used the Indices Stats API documentation to create the function.

In file client.js

stats: function (options, callback) {
  var query = [],
  url = '',
  hasOptions;

  if (typeof options === 'function') {
    callback = options;
    options = {};
  }
  //Create a copy of options so we can modify it.
  options = util.merge(options || {});

  if (options.index) {
    url = '/' + encode(Array.isArray(options.index) ?
      options.index.join(',') : options.index);
    delete options.index;
    //Look for types only if there is an index
    if (options.types) {
      query.push(encode('types') + '=' + encode(
        Array.isArray(options.types) ?
        options.types.join(',') : options.types));
    }
    delete options.types;
  }

  url += '/_stats';

  util.each(options, function (value, name) {
    if (value === true || value === false) {
      value = value ? '1' : '0';
    }

    query.push(encode(name) + '=' + encode(value));
  });

  if (query.length) {
    url += '?' + query.join('&');
  }

  this._request(url, {
    method: 'GET'
  }, function (err, res) {
    if(err) { return callback(err, null, res), undefined; }
    callback(null, res);
  });
},
In file index.js
stats: function (options, callback) {
  if (typeof options === 'function') {
    callback = options;
    options = {};
  }
  this.client.stats(util.merge(options,{index: this.name}), callback);
},

Request: noproxy configuration

Like with npm and for the same reasons, it would be a great idea to have a noproxy configuration in request.
So here is the pull request!

if(self.noproxy) {
  if(typeof self.noproxy == 'string') {
    if(self.noproxy.search(self.uri.hostname) !== -1) {
      delete self.proxy
    }
  }
}

Really simple. If the hostname is in the noproxy string, we delete the proxy parameter so that it won’t be used.

And the test which validate the modification:

/*
** Test noproxy configuration.
**
** We create a server and a proxy.
** Server listens on localhost:80.
** Proxy listens on localhost:8080.
** The proxy redirects all requests to /proxy on the server.
** On the server, /proxy sends "proxy" .
** When server is directly requested, it answers with "noproxy" .
**
**
** So we perform 2 tests, both with proxy equal to "http://localhost:8080".
** -A test is performed with noproxy equal to "null". In this case,
** the server responds with "proxy" because the proxy is used.
** -In the other test, noproxy equal "localhost, example.com".
** Since localhost is part of noproxy, request is made directly
** to the server and proxy is ignored.
*/

var assert = require("assert")
  , http = require('http')
  , request = require('../main.js')
  //We create a server and a proxy.
  , server = http.createServer(function(req, res){
      res.statusCode = 200
      if(req.url == '/proxy') {
        res.end('proxy')
      } else {
        res.end('noproxy')
      }
    })
  , proxy = http.createServer(function (req, res) {
      res.statusCode = 200
      var url = 'http://localhost:80/proxy'
      var x = request(url)
      req.pipe(x)
      x.pipe(res)
    })
    ;

//Launch server and proxy
var initialize = function (cb) {
  server.listen(80, 'localhost', function () {
    proxy.listen(8080, 'localhost', cb)
  })
}

//Tests
initialize(function () {
  //Checking the route for server and proxy
  request.get("http://localhost:80/test", function (err, res, body) {
    assert.equal(res.statusCode, 200)
    request.get("http://localhost:80/proxy", function (err, res2, body) {
      assert.equal(res2.statusCode, 200)
      request.get("http://localhost:8080/test", function (err, res3, body) {
        assert.equal(res3.statusCode, 200)
        makeNoProxyTest(function () {
          makeProxyTest(function () {
            closeServer(server)
            closeServer(proxy)
          })
        })
      })
    })
  })
})

//Request with noproxy
var makeNoProxyTest = function (cb) {
  request ({
    url: 'http://localhost:80/test',
    proxy: 'http://localhost:8080',
    noproxy: 'localhost, example.com'
  }, function (err, res, body) {
    assert.equal(body, 'noproxy')
    cb()
  })
}

//Request with proxy
var makeProxyTest = function (cb) {
  request ({
    url: 'http://localhost:80/test',
    proxy: 'http://localhost:8080',
    noproxy: 'null'
  }, function (err, res, body) {
    assert.equal(body, 'proxy')
    cb()
  })
}

var closeServer = function (s) {
  s.close()
}

NPM: noproxy Configuration

For security reasons, every firms are generally using a proxy to manage their internet connection. Coding behind a proxy is fine. Most softwares are able to deal with it. You just have to set it in the environment variable of your GNU/Linux OS or directly in the software.

But there is one problem. Because every request must go through the proxy, it’s impossible to request url based in the internal network. There is only one solution: disable proxy, make request, enable proxy. If you are using npm until now, that’s what you have to do.

In order to make this process easier, I’ve made a pull request which add a noproxy configuration. When npm fetches a package from a given url, the hostname is compared with noproxy configuration. If there is a match, the request is made without proxy, if not proxy is used.

The noproxy configuration looks for a variable named « noproxy » in npm configuration.
« noproxy » is a string containing hostnames. So, this list of hostnames will not ever go through a proxy.
Code concerned:

var proxy = null
if(npm.config.get("noproxy").search(remote.hostname) === -1) {
  if (remote.protocol !== "https:" || 
      !(proxy = npm.config.get("https-proxy"))) {
    proxy = npm.config.get("proxy")
  }
}

var opts = { url: remote
          , proxy: proxy
          , strictSSL: npm.config.get("strict-ssl")
          , ca: remote.host === regHost ? 
                                npm.config.get("ca") : undefined
          , headers: { "user-agent": npm.config.get("user-agent") }}

var req = request(opts)

At the moment, I’m still waiting for Isaacs to add the pull request in npm.
I hope it will be added soon.

Update:                                                                                                                                     Isaacs has pointed out that it’s good for the npm bit but some modifications are still needed in npm-registry-client and npmconf . So here are new additions:

  • npmconf:
 , "noproxy" : process.env.NO_PROXY || process.env.no_proxy ||  "null"
 , "no-proxy" : ["null", String]
  • npm-registry-client:
  var p = this.conf.get('proxy')
  var sp = this.conf.get('https-proxy') || p
  var np = this.conf.get('noproxy')

  if(np.search(remote.hostname) === -1) {
    opts.proxy = remote.protocol === "https:" ? sp : p
  }

I hope everything is now in order so that noproxy configuration can be added to npm :).

Mocha: path to mocha.opts with -f

Mocha is a wonderful (:D) test framework which allows you to write really understandable tests in JavaScript. I’ve already introduced it in a previous article, so today I’ll focus on a pull request I’ve proposed.

Like other programs, you can pass arguments to mocha, like for instance –reporter used to specified a way to display test results. By the way, you should try once the nyan reporter. Concerning mocha, it’s also possible to set arguments in a file named « mocha.opts ». Interesting feature, but the file must absolutely be stored in your test directory.

So the idea is to add an argument: -f, –fileconf <path>. Furthermore, the –fileconf option allows you to specify the configuration file that will be used, by default “/test/mocha.opts”; allowing you to store your configuration file anywhere you wanted. This modification is very useful if you make tests using the same configuration file in many directories. Without –fileconf, you have to copy your file in each directory, and even a small modification must be done on each file. Using -f, you store the mocha.opts file in a unique place, so that any modifications of the file would immediatly impact all tests.

About the code

Add the parameter with commander:

.option('-f,--config <path>','specify the path to configuration file')

Load the file using commander to parse the command line:

//-f, --config
program.parse(process.argv);
var pathConf = 'test/mocha.opts';
if (program.config && exists(program.config)) {
  pathConf = program.config;
}
try {
  var opts = fs.readFileSync(pathConf, 'utf8')
    .trim()
    .split(/\s+/);

  process.argv = process.argv
    .slice(0, 2)
    .concat(opts.concat(process.argv.slice(2)));
} catch (err) {
  // ignore
}

In fact, this code is not available in mocha yet since « parsing twice might have some strange side-effects »…

An other way to augment argv without using commander:

//-f, --config

var pathConf = 'test/mocha.opts'
  , fIndex = process.argv.indexOf('-f')
  , configIndex = process.argv.indexOf('--config');

if(fIndex !== -1 && exists(process.argv[fIndex + 1]) ) {
  pathConf = process.argv[fIndex + 1];
}

if(configIndex !== -1 && exists(process.argv[configIndex + 1]) ) {
  pathConf = process.argv[configIndex + 1];
}

And I’ll finished this article by quoting visionmedia:

Maybe it’s ok but that’s still pretty hacky.

Node-Elastical: Delete by query option

What is Elastical?

Elastical is a Node.js client library for the ElasticSearch REST API.

That’s it for the presentation.

Until a few weeks ago, it wasn’t possible to delete data by query without using a hack. The hack was the following:

client.delete('twitter', 'tweet', '', {q: 'user:Shay'} );

Here, we set id to an empty string and we use the options parameter to perform a search on user with value Shay. Not so practical at all.

Using curl to interact with ElasticSearch, it is possible to delete by query:

curl -XDELETE 'http://localhost:9200/twitter/tweet/_query' -d ' 
  { "term" : { "user" : "kimchy" } 
}
'

This handy way of deleting things in ElasticSearch was not possible with Elastical (unless, you managed to use the hack above).

So I’ve added a new parameter options.query which allow us to perform such a query more easily.

client.delete('twitter', 'tweet', '', {query:
  { "term" : { "user" : "kimchy" } } 
});

When using the query parameter, id and all other options except ignoreMissing will be ignored.

Let’s take a quick look at the code involved:

if(params.query) {
  url = '/' + encode(this.name) + '/' + encode(type) + '/_query';

  this.client._request(url, {method: 'DELETE', json: params.query}
  , function (err, res) {
    if (err) {
      if (ignoreMissing && res && res.found === false) {
        return callback(null, res), undefined;
      } else {
        return callback(err, res), undefined;
      }
    }
    callback(null, res);
  });
} else {

It’s pretty simple. First, we need to build the url, then we’re requesting ElasticSearch on the given url, passing the query in the json parameter. If options.query doesn’t exist, then the original code is used.

As a conclusion, delete by query is now available in Elastical and you should use it!

 

ElasticSearch: Delete By Query API