From 95ba06b3c07629ad36a09d9cf1c49e05ebe90957 Mon Sep 17 00:00:00 2001 From: "Marcos G. Zimmermann" Date: Fri, 16 Aug 2024 11:35:44 -0300 Subject: [PATCH 1/4] feat!: update arguments of the lazy import --- CHANGELOG.md | 3 + Gemfile.lock | 2 +- gemfiles/Gemfile.elasticsearch-1.x.lock | 2 +- gemfiles/Gemfile.elasticsearch-2.x.lock | 2 +- gemfiles/Gemfile.elasticsearch-5.x.lock | 2 +- gemfiles/Gemfile.elasticsearch-6.x.lock | 2 +- gemfiles/Gemfile.elasticsearch-7.x.lock | 2 +- gemfiles/Gemfile.elasticsearch-8.x.lock | 2 +- gemfiles/Gemfile.opensearch-1.x.lock | 2 +- gemfiles/Gemfile.opensearch-2.x.lock | 2 +- lib/esse/cli/index.rb | 17 +++--- lib/esse/index/documents.rb | 6 +- lib/esse/version.rb | 2 +- spec/esse/cli/index/import_spec.rb | 59 ++++++++++++------- .../repository_documents_import.rb | 14 ++--- 15 files changed, 68 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74d5412..7f7bab1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 0.4.0 - 2024-08-16 +* Rename lazy_update_document_attributes to update_lazy_attributes +* Rename eager_include_document_attributes to eager_load_lazy_attributes ## 0.3.6 - 2024-08-07 * Esse::LazyDocumentHeader#to_doc return `Esse::LazyDocumentHeader::Document` instance to properly separate context metadata from document source * Add `.collection_class` method to the `Esse::Repository` class to let external plugins and extensions to access it instead of read @collection_proc variable diff --git a/Gemfile.lock b/Gemfile.lock index 33928ee..4120646 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - esse (0.3.6) + esse (0.4.0.rc1) multi_json thor (>= 0.19) diff --git a/gemfiles/Gemfile.elasticsearch-1.x.lock b/gemfiles/Gemfile.elasticsearch-1.x.lock index 382ab40..24f54b7 100644 --- a/gemfiles/Gemfile.elasticsearch-1.x.lock +++ b/gemfiles/Gemfile.elasticsearch-1.x.lock @@ -1,7 +1,7 @@ PATH remote: .. specs: - esse (0.3.6) + esse (0.4.0.rc1) multi_json thor (>= 0.19) diff --git a/gemfiles/Gemfile.elasticsearch-2.x.lock b/gemfiles/Gemfile.elasticsearch-2.x.lock index 6cdb044..6170440 100644 --- a/gemfiles/Gemfile.elasticsearch-2.x.lock +++ b/gemfiles/Gemfile.elasticsearch-2.x.lock @@ -1,7 +1,7 @@ PATH remote: .. specs: - esse (0.3.6) + esse (0.4.0.rc1) multi_json thor (>= 0.19) diff --git a/gemfiles/Gemfile.elasticsearch-5.x.lock b/gemfiles/Gemfile.elasticsearch-5.x.lock index 14d0495..418a138 100644 --- a/gemfiles/Gemfile.elasticsearch-5.x.lock +++ b/gemfiles/Gemfile.elasticsearch-5.x.lock @@ -1,7 +1,7 @@ PATH remote: .. specs: - esse (0.3.6) + esse (0.4.0.rc1) multi_json thor (>= 0.19) diff --git a/gemfiles/Gemfile.elasticsearch-6.x.lock b/gemfiles/Gemfile.elasticsearch-6.x.lock index c4cd2c3..242087c 100644 --- a/gemfiles/Gemfile.elasticsearch-6.x.lock +++ b/gemfiles/Gemfile.elasticsearch-6.x.lock @@ -1,7 +1,7 @@ PATH remote: .. specs: - esse (0.3.6) + esse (0.4.0.rc1) multi_json thor (>= 0.19) diff --git a/gemfiles/Gemfile.elasticsearch-7.x.lock b/gemfiles/Gemfile.elasticsearch-7.x.lock index 388bc9e..131a12f 100644 --- a/gemfiles/Gemfile.elasticsearch-7.x.lock +++ b/gemfiles/Gemfile.elasticsearch-7.x.lock @@ -1,7 +1,7 @@ PATH remote: .. specs: - esse (0.3.6) + esse (0.4.0.rc1) multi_json thor (>= 0.19) diff --git a/gemfiles/Gemfile.elasticsearch-8.x.lock b/gemfiles/Gemfile.elasticsearch-8.x.lock index 31e84f7..bffe836 100644 --- a/gemfiles/Gemfile.elasticsearch-8.x.lock +++ b/gemfiles/Gemfile.elasticsearch-8.x.lock @@ -1,7 +1,7 @@ PATH remote: .. specs: - esse (0.3.6) + esse (0.4.0.rc1) multi_json thor (>= 0.19) diff --git a/gemfiles/Gemfile.opensearch-1.x.lock b/gemfiles/Gemfile.opensearch-1.x.lock index ef7e822..99463dd 100644 --- a/gemfiles/Gemfile.opensearch-1.x.lock +++ b/gemfiles/Gemfile.opensearch-1.x.lock @@ -1,7 +1,7 @@ PATH remote: .. specs: - esse (0.3.6) + esse (0.4.0.rc1) multi_json thor (>= 0.19) diff --git a/gemfiles/Gemfile.opensearch-2.x.lock b/gemfiles/Gemfile.opensearch-2.x.lock index 913c057..438ab0f 100644 --- a/gemfiles/Gemfile.opensearch-2.x.lock +++ b/gemfiles/Gemfile.opensearch-2.x.lock @@ -1,7 +1,7 @@ PATH remote: .. specs: - esse (0.3.6) + esse (0.4.0.rc1) multi_json thor (>= 0.19) diff --git a/lib/esse/cli/index.rb b/lib/esse/cli/index.rb index c39abbe..4b0feda 100644 --- a/lib/esse/cli/index.rb +++ b/lib/esse/cli/index.rb @@ -97,18 +97,17 @@ def open(*index_classes) option :suffix, type: :string, default: nil, aliases: '-s', desc: 'Suffix to append to index name' option :context, type: :hash, default: {}, required: true, desc: 'List of options to pass to the index class' option :repo, type: :string, default: nil, alias: '-r', desc: 'Repository to use for import' - option :eager_include_document_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to include to the bulk index request. Or pass `true` to include all lazy attributes' - option :lazy_update_document_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to bulk update after the bulk index request Or pass `true` to include all lazy attributes' + option :preload_lazy_attributes, type: :string, default: nil, desc: 'Command separated list of lazy document attributes to preload using search API before the bulk import. Or pass `true` to preload all lazy attributes' + option :eager_load_lazy_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to include to the bulk index request. Or pass `true` to include all lazy attributes' + option :update_lazy_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to bulk update after the bulk index request Or pass `true` to include all lazy attributes' + def import(*index_classes) require_relative 'index/import' opts = HashUtils.deep_transform_keys(options.to_h, &:to_sym) - opts.delete(:lazy_update_document_attributes) if opts[:lazy_update_document_attributes] == 'false' - opts.delete(:eager_include_document_attributes) if opts[:eager_include_document_attributes] == 'false' - if (val = opts[:eager_include_document_attributes]) - opts[:eager_include_document_attributes] = (val == 'true') ? true : val.split(',') - end - if (val = opts[:lazy_update_document_attributes]) - opts[:lazy_update_document_attributes] = (val == 'true') ? true : val.split(',') + %i[preload_lazy_attributes eager_load_lazy_attributes update_lazy_attributes].each do |key| + if (val = opts.delete(key)) && val != 'false' + opts[key] = (val == 'true') ? true : val.split(',') + end end Import.new(indices: index_classes, **opts).run end diff --git a/lib/esse/index/documents.rb b/lib/esse/index/documents.rb index 74f365c..ae3725d 100644 --- a/lib/esse/index/documents.rb +++ b/lib/esse/index/documents.rb @@ -199,14 +199,14 @@ def bulk(create: nil, delete: nil, index: nil, update: nil, type: nil, suffix: n # @option [Hash] :context The collection context. This value will be passed as argument to the collection # May be SQL condition or any other filter you have defined on the collection. # @return [Numeric] The number of documents imported - def import(*repo_types, context: {}, eager_include_document_attributes: false, lazy_update_document_attributes: false, suffix: nil, **options) + def import(*repo_types, context: {}, eager_load_lazy_attributes: false, update_lazy_attributes: false, preload_lazy_attributes: false, suffix: nil, **options) repo_types = repo_hash.keys if repo_types.empty? count = 0 repo_hash.slice(*repo_types).each do |repo_name, repo| doc_attrs = {eager: [], lazy: []} - doc_attrs[:eager] = repo.lazy_document_attribute_names(eager_include_document_attributes) - doc_attrs[:lazy] = repo.lazy_document_attribute_names(lazy_update_document_attributes) + doc_attrs[:eager] = repo.lazy_document_attribute_names(eager_load_lazy_attributes) + doc_attrs[:lazy] = repo.lazy_document_attribute_names(update_lazy_attributes) doc_attrs[:lazy] -= doc_attrs[:eager] context ||= {} diff --git a/lib/esse/version.rb b/lib/esse/version.rb index b80db78..b4b4bf7 100644 --- a/lib/esse/version.rb +++ b/lib/esse/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Esse - VERSION = '0.3.6' + VERSION = '0.4.0.rc1' end diff --git a/spec/esse/cli/index/import_spec.rb b/spec/esse/cli/index/import_spec.rb index 6db585f..a4ef42d 100644 --- a/spec/esse/cli/index/import_spec.rb +++ b/spec/esse/cli/index/import_spec.rb @@ -58,44 +58,59 @@ cli_exec(%w[index import CountiesIndex CitiesIndex]) end - it 'allows --eager-include-document-attributes as a comma separated list' do - expect(CountiesIndex).to receive(:import).with(eager_include_document_attributes: %w[foo bar], context: {}).and_return(true) - cli_exec(%w[index import CountiesIndex --eager-include-document-attributes=foo,bar]) + it 'allows --eager-load-lazy-attributes as a comma separated list' do + expect(CountiesIndex).to receive(:import).with(eager_load_lazy_attributes: %w[foo bar], context: {}).and_return(true) + cli_exec(%w[index import CountiesIndex --eager-load-lazy-attributes=foo,bar]) end - it 'allows --lazy-update-document-attributes as a single value' do - expect(CountiesIndex).to receive(:import).with(lazy_update_document_attributes: %w[foo], context: {}).and_return(true) - cli_exec(%w[index import CountiesIndex --lazy-update-document-attributes=foo]) + it 'allows --update-lazy-attributes as a single value' do + expect(CountiesIndex).to receive(:import).with(update_lazy_attributes: %w[foo], context: {}).and_return(true) + cli_exec(%w[index import CountiesIndex --update-lazy-attributes=foo]) end - it 'allows --lazy-update-document-attributes as true' do - expect(CountiesIndex).to receive(:import).with(lazy_update_document_attributes: true, context: {}).and_return(true) - cli_exec(%w[index import CountiesIndex --lazy-update-document-attributes=true]) + it 'allows --update-lazy-attributes as true' do + expect(CountiesIndex).to receive(:import).with(update_lazy_attributes: true, context: {}).and_return(true) + cli_exec(%w[index import CountiesIndex --update-lazy-attributes=true]) end - it 'allows --lazy-update-document-attributes as false' do + it 'allows --update-lazy-attributes as false' do expect(CountiesIndex).to receive(:import).with(context: {}).and_return(true) - cli_exec(%w[index import CountiesIndex --lazy-update-document-attributes=false]) + cli_exec(%w[index import CountiesIndex --update-lazy-attributes=false]) end - it 'allows --lazy-update-document-attributes as a comma separated list' do - expect(CountiesIndex).to receive(:import).with(lazy_update_document_attributes: %w[foo bar], context: {}).and_return(true) - cli_exec(%w[index import CountiesIndex --lazy-update-document-attributes=foo,bar]) + it 'allows --update-lazy-attributes as a comma separated list' do + expect(CountiesIndex).to receive(:import).with(update_lazy_attributes: %w[foo bar], context: {}).and_return(true) + cli_exec(%w[index import CountiesIndex --update-lazy-attributes=foo,bar]) end - it 'allows --lazy-update-document-attributes as a single value' do - expect(CountiesIndex).to receive(:import).with(lazy_update_document_attributes: %w[foo], context: {}).and_return(true) - cli_exec(%w[index import CountiesIndex --lazy-update-document-attributes=foo]) + it 'allows --update-lazy-attributes as a single value' do + expect(CountiesIndex).to receive(:import).with(update_lazy_attributes: %w[foo], context: {}).and_return(true) + cli_exec(%w[index import CountiesIndex --update-lazy-attributes=foo]) end - it 'allows --lazy-update-document-attributes as true' do - expect(CountiesIndex).to receive(:import).with(lazy_update_document_attributes: true, context: {}).and_return(true) - cli_exec(%w[index import CountiesIndex --lazy-update-document-attributes=true]) + it 'allows --update-lazy-attributes as true' do + expect(CountiesIndex).to receive(:import).with(update_lazy_attributes: true, context: {}).and_return(true) + cli_exec(%w[index import CountiesIndex --update-lazy-attributes=true]) end - it 'allows --lazy-update-document-attributes as false' do + it 'allows --update-lazy-attributes as false' do expect(CountiesIndex).to receive(:import).with(context: {}).and_return(true) - cli_exec(%w[index import CountiesIndex --lazy-update-document-attributes=false]) + cli_exec(%w[index import CountiesIndex --update-lazy-attributes=false]) + end + + it 'allows --preload-lazy-attributes as a comma separated list' do + expect(CountiesIndex).to receive(:import).with(preload_lazy_attributes: %w[foo bar], context: {}).and_return(true) + cli_exec(%w[index import CountiesIndex --preload-lazy-attributes=foo,bar]) + end + + it 'allows --preload-lazy-attributes as true' do + expect(CountiesIndex).to receive(:import).with(preload_lazy_attributes: true, context: {}).and_return(true) + cli_exec(%w[index import CountiesIndex --preload-lazy-attributes=true]) + end + + it 'allows --preload-lazy-attributes as false' do + expect(CountiesIndex).to receive(:import).with(context: {}).and_return(true) + cli_exec(%w[index import CountiesIndex --preload-lazy-attributes=false]) end end end diff --git a/spec/support/shared_examples/repository_documents_import.rb b/spec/support/shared_examples/repository_documents_import.rb index 5139602..955041b 100644 --- a/spec/support/shared_examples/repository_documents_import.rb +++ b/spec/support/shared_examples/repository_documents_import.rb @@ -63,14 +63,14 @@ end end - context 'when the lazy_update_document_attributes is set' do + context 'when the update_lazy_attributes is set' do it 'indexes the data and bulk updates all the lazy document attributes' do es_client do |client, _conf, cluster| GeosIndex.create_index(alias: true) resp = nil expect { - resp = GeosIndex::County.import(lazy_update_document_attributes: true) + resp = GeosIndex::County.import(update_lazy_attributes: true) }.not_to raise_error expect(resp).to eq(total_counties) @@ -89,7 +89,7 @@ resp = nil expect { - resp = GeosIndex::County.import(lazy_update_document_attributes: %i[country]) + resp = GeosIndex::County.import(update_lazy_attributes: %i[country]) }.not_to raise_error expect(resp).to eq(total_counties) @@ -103,14 +103,14 @@ end end - context 'when the eager_include_document_attributes is set' do + context 'when the eager_load_lazy_attributes is set' do it 'indexes the data and bulk updates all the eager document attributes' do es_client do |client, _conf, cluster| GeosIndex.create_index(alias: true) resp = nil expect { - resp = GeosIndex::County.import(eager_include_document_attributes: true) + resp = GeosIndex::County.import(eager_load_lazy_attributes: true) }.not_to raise_error expect(resp).to eq(total_counties) @@ -129,7 +129,7 @@ resp = nil expect { - resp = GeosIndex::County.import(eager_include_document_attributes: %i[country]) + resp = GeosIndex::County.import(eager_load_lazy_attributes: %i[country]) }.not_to raise_error expect(resp).to eq(total_counties) @@ -174,7 +174,7 @@ resp = nil expect { - resp = StoriesIndex::Story.import(lazy_update_document_attributes: %i[tags]) + resp = StoriesIndex::Story.import(update_lazy_attributes: %i[tags]) }.not_to raise_error expect(resp).to eq(stories.size) From 68038042d9d6274628c030e881a8d086042bd157 Mon Sep 17 00:00:00 2001 From: "Marcos G. Zimmermann" Date: Fri, 16 Aug 2024 18:33:10 -0300 Subject: [PATCH 2/4] feat: preload_lazy_attributes on import fetch the existing value from doc search before index --- lib/esse/document.rb | 2 - lib/esse/index/documents.rb | 100 ++++++++++++++---- .../repository_documents_import.rb | 54 ++++++++++ 3 files changed, 133 insertions(+), 23 deletions(-) diff --git a/lib/esse/document.rb b/lib/esse/document.rb index 841d907..a47f9f6 100644 --- a/lib/esse/document.rb +++ b/lib/esse/document.rb @@ -115,8 +115,6 @@ def mutate(key) instance_variable_set(:@__mutated_source__, nil) end - protected - def mutated_source return source unless @__mutations__ diff --git a/lib/esse/index/documents.rb b/lib/esse/index/documents.rb index ae3725d..c64167a 100644 --- a/lib/esse/index/documents.rb +++ b/lib/esse/index/documents.rb @@ -114,7 +114,7 @@ def delete(doc = nil, suffix: nil, **options) def update(doc = nil, suffix: nil, **options) if document?(doc) options[:id] = doc.id - options[:body] = { doc: doc.source } + options[:body] = { doc: doc.mutated_source } options[:type] = doc.type if doc.type? options[:routing] = doc.routing if doc.routing? end @@ -140,7 +140,7 @@ def update(doc = nil, suffix: nil, **options) def index(doc = nil, suffix: nil, **options) if document?(doc) options[:id] = doc.id - options[:body] = doc.source + options[:body] = doc.mutated_source options[:type] = doc.type if doc.type? options[:routing] = doc.routing if doc.routing? end @@ -198,39 +198,97 @@ def bulk(create: nil, delete: nil, index: nil, update: nil, type: nil, suffix: n # @option [String, nil] :suffix The index suffix. Defaults to the nil. # @option [Hash] :context The collection context. This value will be passed as argument to the collection # May be SQL condition or any other filter you have defined on the collection. + # @option [Boolean, Array] :eager_load_lazy_attributes A list of lazy document attributes to include to the bulk index request. + # Or pass `true` to include all lazy attributes. + # @option [Boolean, Array] :update_lazy_attributes A list of lazy document attributes to bulk update each after the bulk import. + # Or pass `true` to update all lazy attributes. + # @option [Boolean, Array] :preload_lazy_attributes A list of lazy document attributes to preload using search API before the bulk import. + # Or pass `true` to preload all lazy attributes. # @return [Numeric] The number of documents imported def import(*repo_types, context: {}, eager_load_lazy_attributes: false, update_lazy_attributes: false, preload_lazy_attributes: false, suffix: nil, **options) repo_types = repo_hash.keys if repo_types.empty? count = 0 repo_hash.slice(*repo_types).each do |repo_name, repo| - doc_attrs = {eager: [], lazy: []} - doc_attrs[:eager] = repo.lazy_document_attribute_names(eager_load_lazy_attributes) - doc_attrs[:lazy] = repo.lazy_document_attribute_names(update_lazy_attributes) - doc_attrs[:lazy] -= doc_attrs[:eager] + # Elasticsearch 6.x and older have multiple types per index. + # This gem supports multiple types per index for backward compatibility, but we recommend to update + # your elasticsearch to a at least 7.x version and use a single type per index. + # + # Note that the repository name will be used as the document type. + # mapping_default_type + bulk_kwargs = { suffix: suffix, type: repo_name, **options } + cluster.may_update_type!(bulk_kwargs) + lazy_attrs_to_eager_load = repo.lazy_document_attribute_names(eager_load_lazy_attributes) + lazy_attrs_to_search_preload = repo.lazy_document_attribute_names(preload_lazy_attributes) + lazy_attrs_to_update_after = repo.lazy_document_attribute_names(update_lazy_attributes) + lazy_attrs_to_update_after -= lazy_attrs_to_eager_load + lazy_attrs_to_search_preload -= lazy_attrs_to_eager_load + + # @TODO Refactor this by combining the upcoming code again with repo.each_serialized_batch as it was before: + # context[:lazy_attributes] = lazy_attrs_to_eager_load if lazy_attrs_to_eager_load.any? + # repo.each_serialized_batch(**context) do |batch| + # bulk(**bulk_kwargs, index: batch) + + # lazy_attrs_to_update_after.each do |attr_name| + # partial_docs = repo.documents_for_lazy_attribute(attr_name, batch.reject(&:ignore_on_index?)) + # next if partial_docs.empty? + + # bulk(**bulk_kwargs, update: partial_docs) + # end + # count += batch.size + # end context ||= {} - context[:lazy_attributes] = doc_attrs[:eager] if doc_attrs[:eager].any? - repo.each_serialized_batch(**context) do |batch| - # Elasticsearch 6.x and older have multiple types per index. - # This gem supports multiple types per index for backward compatibility, but we recommend to update - # your elasticsearch to a at least 7.x version and use a single type per index. - # - # Note that the repository name will be used as the document type. - # mapping_default_type - kwargs = { suffix: suffix, type: repo_name, **options } - cluster.may_update_type!(kwargs) + repo.send(:each_batch, **context) do |*args| + batch, collection_context = args + collection_context ||= {} + entries = [*batch].map { |entry| repo.serialize(entry, **collection_context) }.compact + + if lazy_attrs_to_eager_load + attrs = lazy_attrs_to_eager_load.is_a?(Array) ? lazy_attrs_to_eager_load : repo.lazy_document_attribute_names(lazy_attrs_to_eager_load) + attrs.each do |attr_name| + repo.retrieve_lazy_attribute_values(attr_name, entries).each do |doc_header, value| + doc = entries.find { |d| doc_header.id.to_s == d.id.to_s && doc_header.type == d.type && doc_header.routing == d.routing } + doc&.mutate(attr_name) { value } + end + end + end + + preload_search_result = Hash.new { |h, arr_name| h[arr_name] = {} } + if lazy_attrs_to_search_preload.any? + hits = repo.index.search(query: {ids: {values: entries.map(&:id)} }, _source: lazy_attrs_to_search_preload).response.hits + hits.each do |hit| + doc_header = Esse::LazyDocumentHeader.coerce(hit.slice('_id', '_routing')) # TODO Add '_type', when adjusting eql to tread _doc properly + next unless doc_header.valid? + hit.dig('_source')&.each do |attr_name, attr_value| + real_attr_name = repo.lazy_document_attribute_names(attr_name).first + preload_search_result[real_attr_name][doc_header] = attr_value + end + end + preload_search_result.each do |attr_name, values| + values.each do |doc_header, value| + doc = entries.find { |d| doc_header.id.to_s == d.id.to_s && doc_header.type == d.type && doc_header.routing == d.routing } + doc&.mutate(attr_name) { value } + end + end + end + + bulk(**bulk_kwargs, index: entries) - bulk(**kwargs, index: batch) + lazy_attrs_to_update_after.each do |attr_name| + preloaded_ids = preload_search_result[attr_name].keys + filtered_docs = entries.reject do |doc| + doc.ignore_on_index? || preloaded_ids.any? { |d| d.id.to_s == doc.id.to_s && d.type == doc.type && d.routing == doc.routing } + end + next if filtered_docs.empty? - doc_attrs[:lazy].each do |attr_name| - partial_docs = repo.documents_for_lazy_attribute(attr_name, batch.reject(&:ignore_on_index?)) + partial_docs = repo.documents_for_lazy_attribute(attr_name, filtered_docs) next if partial_docs.empty? - bulk(**kwargs, update: partial_docs) + bulk(**bulk_kwargs, update: partial_docs) end - count += batch.size + count += entries.size end end count diff --git a/spec/support/shared_examples/repository_documents_import.rb b/spec/support/shared_examples/repository_documents_import.rb index 955041b..587cc3e 100644 --- a/spec/support/shared_examples/repository_documents_import.rb +++ b/spec/support/shared_examples/repository_documents_import.rb @@ -143,6 +143,60 @@ end end + context 'when the preload_lazy_attributes is set' do + it 'search the given lazy document attributes before the bulk import' do + es_client do |client, _conf, cluster| + GeosIndex.create_index(alias: true) + + doc_to_import = GeosIndex::County.documents(conditions: ->(h) { h[:id] == 888 }).first + doc_to_import.mutate(:country) { 'BR' } + GeosIndex.index(doc_to_import, refresh: :wait_for) + + resp = nil + expect { + resp = GeosIndex::County.import(preload_lazy_attributes: %i[country]) + }.not_to raise_error + expect(resp).to eq(total_counties) + + GeosIndex.refresh + expect(GeosIndex.count).to eq(total_counties) + + doc = GeosIndex.get(id: '888') + expect(doc.dig('_source', 'country')).to eq('BR') + + doc = GeosIndex.get(id: '999') + expect(doc.dig('_source', 'country')).to eq(nil) + end + end + end + + context 'when the both preload_lazy_attributes and update_lazy_attributes are set' do + it 'search the given lazy document attributes before the bulk import, and do an additional bulk update for the not preloaded attributes' do + es_client do |client, _conf, cluster| + GeosIndex.create_index(alias: true) + + doc_to_import = GeosIndex::County.documents(conditions: ->(h) { h[:id] == 888 }).first + doc_to_import.mutate(:country) { 'BR' } + GeosIndex.index(doc_to_import, refresh: :wait_for) + + resp = nil + expect { + resp = GeosIndex::County.import(preload_lazy_attributes: %i[country], update_lazy_attributes: %i[country]) + }.not_to raise_error + expect(resp).to eq(total_counties) + + GeosIndex.refresh + expect(GeosIndex.count).to eq(total_counties) + + doc = GeosIndex.get(id: '888') + expect(doc.dig('_source', 'country')).to eq('BR') + + doc = GeosIndex.get(id: '999') + expect(doc.dig('_source', 'country')).to eq('US') + end + end + end + context 'when the document routing is set' do include_context 'with stories index definition' From 50a613d3b7eb824d667c14443f851c9316f18f59 Mon Sep 17 00:00:00 2001 From: "Marcos G. Zimmermann" Date: Fri, 16 Aug 2024 18:35:31 -0300 Subject: [PATCH 3/4] feat: backward compatibility for lazy_update_document_attributes and eager_include_document_attributes --- lib/esse/index/documents.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/esse/index/documents.rb b/lib/esse/index/documents.rb index c64167a..b6dc6dc 100644 --- a/lib/esse/index/documents.rb +++ b/lib/esse/index/documents.rb @@ -209,6 +209,10 @@ def import(*repo_types, context: {}, eager_load_lazy_attributes: false, update_l repo_types = repo_hash.keys if repo_types.empty? count = 0 + # Backward compatibility while I change plugins using it + update_lazy_attributes = options.delete(:lazy_update_document_attributes) if options.key?(:lazy_update_document_attributes) + eager_load_lazy_attributes = options.delete(:eager_include_document_attributes) if options.key?(:eager_include_document_attributes) + repo_hash.slice(*repo_types).each do |repo_name, repo| # Elasticsearch 6.x and older have multiple types per index. # This gem supports multiple types per index for backward compatibility, but we recommend to update From 4366b2236121cf97005532bfcad171eee9a93331 Mon Sep 17 00:00:00 2001 From: "Marcos G. Zimmermann" Date: Fri, 16 Aug 2024 18:40:14 -0300 Subject: [PATCH 4/4] chore: update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f7bab1..262d2e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## 0.4.0 - 2024-08-16 * Rename lazy_update_document_attributes to update_lazy_attributes * Rename eager_include_document_attributes to eager_load_lazy_attributes +* Add preload_lazy_attributes option to the import in order to fetch the lazy attributes in a single query before bulk indexing + ## 0.3.6 - 2024-08-07 * Esse::LazyDocumentHeader#to_doc return `Esse::LazyDocumentHeader::Document` instance to properly separate context metadata from document source * Add `.collection_class` method to the `Esse::Repository` class to let external plugins and extensions to access it instead of read @collection_proc variable