Elasticsearch affiche tous les résultats en utilisant scroll in node js

Question

J'essaie essentiellement de montrer tous les enregistrements d'un type d'index. Maintenant, si vous utilisez match_all () dans la requête, elasticsearch affiche 10 résultats par défaut. On peut afficher tous les résultats en utilisant scroll. J'essaie de mettre en œuvre une API de défilement, mais je n'arrive pas à le faire fonctionner. Il montre seulement 10 résultats, mon code:

module.exports.searchAll = function (searchData, callback) { client.search({ index: 'test', type: 'records', scroll: '10s', //search_type: 'scan', //if I use search_type then it requires size otherwise it shows 0 result body: { query: { "match_all": {} } } }, function (err, resp) { client.scroll({ scrollId: resp._scroll_id, scroll: '10s' }, callback(resp.hits.hits)); }); }

Quelqu'un peut-il aider s'il vous plaît?

Ceilingfish · Accepted Answer

Vous devez appeler client.scroll à plusieurs reprises jusqu'à ce qu'aucun autre enregistrement ne soit renvoyé. Il y a un bon exemple dans la documentation elasticsearch . J'ai reproduit leur exemple de code ci-dessous, légèrement modifié pour répondre à votre question.

var allRecords = []; // first we do a search, and specify a scroll timeout client.search({ index: 'test', type: 'records', scroll: '10s', body: { query: { "match_all": {} } } }, function getMoreUntilDone(error, response) { // collect all the records response.hits.hits.forEach(function (hit) { allRecords.Push(hit); }); if (response.hits.total !== allRecords.length) { // now we can call scroll over and over client.scroll({ scrollId: response._scroll_id, scroll: '10s' }, getMoreUntilDone); } else { console.log('all done', allRecords); } });

Visualize · Answer

Merci @Ceilingfish. Voici une version ES6 modifiée de ce qui précède en utilisant wait

let allRecords = []; // first we do a search, and specify a scroll timeout var { _scroll_id, hits } = await esclient.search({ index: 'test', type: 'records', scroll: '10s', body: { query: { "match_all": {} }, _source: false } }) while(hits && hits.hits.length) { // Append all new hits allRecords.Push(...hits.hits) console.log(`${allRecords.length} of ${hits.total}`) var { _scroll_id, hits } = await esclient.scroll({ scrollId: _scroll_id, scroll: '10s' }) } console.log(`Complete: ${allRecords.length} records retrieved`)

mahendiran chandrasekar · Answer

C'est ce que j'utilise avec Promises

var EsHelper = function() { this.esUrl = esUrl; this.indexName = "myIndex"; this.type = "myIndexType"; this.elasticClient = new elasticsearch.Client({ Host: esUrl }); }; EsHelper.prototype.scrollData = function(response, allHits) { return new Promise((resolve, reject) => { response.hits.hits.forEach((hit) => allHits.Push(hit)); if (response.hits.total !== allHits.length) { this.elasticClient.scroll({ scroll_id: response._scroll_id, scroll: '10s', }).then((response) => { resolve(this.scrollData(response, allHits)); }).catch((error) => reject(error)); } else { resolve(allHits); } }); }; EsHelper.prototype.runSearchWithScroll = function(query) { var allHits = []; return this.elasticClient.search({ index: this.indexName, type: this.type, scroll: '10s', body: query }) .then((response) => (this.scrollData(response, allHits))) .then((result) => { return result; }); };

Une meilleure façon?

raka · Answer

NodeJS a échoué lorsque élastique a obtenu plus de 10000 résultats. Voici comment j'ai utilisé le défilement.

async function getResultsFromElastic() { let responseAll = {}; responseAll["hits"] = {}; responseAll.hits.hits = []; const responseQueue = []; searchQuery = { index: 'test', type: 'records', body: { query: { "match_all": {} } } } searchQuery.scroll='10s'; searchQuery.size=10000; responseQueue.Push(await esclient.search(searchQuery)); while (responseQueue.length) { const response = responseQueue.shift(); responseAll.hits.hits = responseAll.hits.hits.concat(response.hits.hits); if (response.hits.total == responseAll.hits.hits.length) { break; } // get the next response if there are more to fetch responseQueue.Push( await esclient.scroll({ scrollId: response._scroll_id, scroll: '30s' }) ); } return responseAll; }