diff --git a/CHANGELOG.md b/CHANGELOG.md index b91a649..25b083d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * Rename lazy_update_document_attributes to update_lazy_attributes * Rename eager_include_document_attributes to eager_load_lazy_attributes * Add preload_lazy_attributes option to the import in order to fetch the lazy attributes in a single query before bulk indexing +* Add `delete_by_query` action to transport and index APIs ## 0.3.6 - 2024-08-07 * Esse::LazyDocumentHeader#to_doc return `Esse::DocumentForPartialUpdate` instance to properly separate context metadata from document source diff --git a/lib/esse/events.rb b/lib/esse/events.rb index 3d443c3..08f5cfc 100644 --- a/lib/esse/events.rb +++ b/lib/esse/events.rb @@ -58,5 +58,6 @@ module Events register_event 'elasticsearch.get' register_event 'elasticsearch.reindex' register_event 'elasticsearch.update_by_query' + register_event 'elasticsearch.delete_by_query' end end diff --git a/lib/esse/index/documents.rb b/lib/esse/index/documents.rb index f611751..485f3c8 100644 --- a/lib/esse/index/documents.rb +++ b/lib/esse/index/documents.rb @@ -315,6 +315,20 @@ def update_by_query(suffix: nil, **options) cluster.api.update_by_query(**definition) end + # Delete documents by query + # + # @param options [Hash] Hash of paramenters that will be passed along to elasticsearch request + # @option [String, nil] :suffix The index suffix. Defaults to the nil. + # + # @return [Hash] The elasticsearch response hash + def delete_by_query(suffix: nil, **options) + definition = { + index: index_name(suffix: suffix), + }.merge(options) + cluster.may_update_type!(definition) + cluster.api.delete_by_query(**definition) + end + protected def document?(doc) diff --git a/lib/esse/transport/indices.rb b/lib/esse/transport/indices.rb index 2a9f437..4e5e636 100644 --- a/lib/esse/transport/indices.rb +++ b/lib/esse/transport/indices.rb @@ -262,6 +262,51 @@ def update_by_query(index:, **options) payload[:response] = coerce_exception { client.update_by_query(**opts) } end end + + # Deletes documents matching the provided query. + # + # @option arguments [List] :index A comma-separated list of index names to search; use `_all` or empty string to perform the operation on all indices + # @option arguments [String] :analyzer The analyzer to use for the query string + # @option arguments [Boolean] :analyze_wildcard Specify whether wildcard and prefix queries should be analyzed (default: false) + # @option arguments [String] :default_operator The default operator for query string query (AND or OR) (options: AND, OR) + # @option arguments [String] :df The field to use as default where no field prefix is given in the query string + # @option arguments [Number] :from Starting offset (default: 0) + # @option arguments [Boolean] :ignore_unavailable Whether specified concrete indices should be ignored when unavailable (missing or closed) + # @option arguments [Boolean] :allow_no_indices Whether to ignore if a wildcard indices expression resolves into no concrete indices. (This includes `_all` string or when no indices have been specified) + # @option arguments [String] :conflicts What to do when the delete by query hits version conflicts? (options: abort, proceed) + # @option arguments [String] :expand_wildcards Whether to expand wildcard expression to concrete indices that are open, closed or both. (options: open, closed, hidden, none, all) + # @option arguments [Boolean] :lenient Specify whether format-based query failures (such as providing text to a numeric field) should be ignored + # @option arguments [String] :preference Specify the node or shard the operation should be performed on (default: random) + # @option arguments [String] :q Query in the Lucene query string syntax + # @option arguments [List] :routing A comma-separated list of specific routing values + # @option arguments [Time] :scroll Specify how long a consistent view of the index should be maintained for scrolled search + # @option arguments [String] :search_type Search operation type (options: query_then_fetch, dfs_query_then_fetch) + # @option arguments [Time] :search_timeout Explicit timeout for each search request. Defaults to no timeout. + # @option arguments [Number] :max_docs Maximum number of documents to process (default: all documents) + # @option arguments [List] :sort A comma-separated list of : pairs + # @option arguments [Number] :terminate_after The maximum number of documents to collect for each shard, upon reaching which the query execution will terminate early. + # @option arguments [List] :stats Specific 'tag' of the request for logging and statistical purposes + # @option arguments [Boolean] :version Specify whether to return document version as part of a hit + # @option arguments [Boolean] :request_cache Specify if request cache should be used for this request or not, defaults to index level setting + # @option arguments [Boolean] :refresh Should the affected indexes be refreshed? + # @option arguments [Time] :timeout Time each individual bulk request should wait for shards that are unavailable. + # @option arguments [String] :wait_for_active_shards Sets the number of shard copies that must be active before proceeding with the delete by query operation. Defaults to 1, meaning the primary shard only. Set to `all` for all shard copies, otherwise set to any non-negative value less than or equal to the total number of copies for the shard (number of replicas + 1) + # @option arguments [Number] :scroll_size Size on the scroll request powering the delete by query + # @option arguments [Boolean] :wait_for_completion Should the request should block until the delete by query is complete. + # @option arguments [Number] :requests_per_second The throttle for this request in sub-requests per second. -1 means no throttle. + # @option arguments [Number|string] :slices The number of slices this task should be divided into. Defaults to 1, meaning the task isn't sliced into subtasks. Can be set to `auto`. + # @option arguments [Hash] :headers Custom HTTP headers + # @option arguments [Hash] :body The search definition using the Query DSL (*Required*) + # + # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-delete-by-query.html + def delete_by_query(index:, **options) + throw_error_when_readonly! + + Esse::Events.instrument('elasticsearch.delete_by_query') do |payload| + payload[:request] = opts = options.merge(index: index) + payload[:response] = coerce_exception { client.delete_by_query(**opts) } + end + end end include InstanceMethods diff --git a/spec/esse/integrations/elasticsearch-5/transport/documents_delete_by_query_spec.rb b/spec/esse/integrations/elasticsearch-5/transport/documents_delete_by_query_spec.rb new file mode 100644 index 0000000..8e60f14 --- /dev/null +++ b/spec/esse/integrations/elasticsearch-5/transport/documents_delete_by_query_spec.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'support/shared_examples/transport_delete_by_query' + +stack_describe 'elasticsearch', '5.x', Esse::Transport, '#delete_by_query' do + include_examples 'transport#delete_by_query', doc_type: true +end diff --git a/spec/esse/integrations/elasticsearch-6/transport/documents_delete_by_query_spec.rb b/spec/esse/integrations/elasticsearch-6/transport/documents_delete_by_query_spec.rb new file mode 100644 index 0000000..319ee7b --- /dev/null +++ b/spec/esse/integrations/elasticsearch-6/transport/documents_delete_by_query_spec.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'support/shared_examples/transport_delete_by_query' + +stack_describe 'elasticsearch', '6.x', Esse::Transport, '#delete_by_query' do + include_examples 'transport#delete_by_query', doc_type: true +end diff --git a/spec/esse/integrations/elasticsearch-7/index/documents_delete_by_query_spec.rb b/spec/esse/integrations/elasticsearch-7/index/documents_delete_by_query_spec.rb new file mode 100644 index 0000000..85a6882 --- /dev/null +++ b/spec/esse/integrations/elasticsearch-7/index/documents_delete_by_query_spec.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'support/shared_examples/index_documents_delete_by_query' + +stack_describe 'elasticsearch', '7.x', Esse::Index, '.delete_by_query' do + include_examples 'index.delete_by_query' +end diff --git a/spec/esse/integrations/elasticsearch-7/transport/documents_delete_by_query_spec.rb b/spec/esse/integrations/elasticsearch-7/transport/documents_delete_by_query_spec.rb new file mode 100644 index 0000000..bed8eee --- /dev/null +++ b/spec/esse/integrations/elasticsearch-7/transport/documents_delete_by_query_spec.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'support/shared_examples/transport_delete_by_query' + +stack_describe 'elasticsearch', '7.x', Esse::Transport, '#delete_by_query' do + include_examples 'transport#delete_by_query' +end diff --git a/spec/esse/integrations/elasticsearch-8/indices/documents_delete_by_query_spec.rb b/spec/esse/integrations/elasticsearch-8/indices/documents_delete_by_query_spec.rb new file mode 100644 index 0000000..df1c568 --- /dev/null +++ b/spec/esse/integrations/elasticsearch-8/indices/documents_delete_by_query_spec.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +require 'spec_helper' +require 'support/shared_examples/index_documents_delete_by_query' + +stack_describe 'elasticsearch', '8.x', Esse::Index, '.delete_by_query' do + include_examples 'index.delete_by_query' +end diff --git a/spec/support/shared_examples/index_documents_delete_by_query.rb b/spec/support/shared_examples/index_documents_delete_by_query.rb new file mode 100644 index 0000000..d72ba09 --- /dev/null +++ b/spec/support/shared_examples/index_documents_delete_by_query.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +RSpec.shared_examples 'index.delete_by_query' do |doc_type: false| + include_context 'with venues index definition' + + let(:params) do + doc_type ? { type: 'venue' } : {} + end + let(:doc_params) do + doc_type ? { _type: 'venue' } : {} + end + let(:index_suffix) { SecureRandom.hex(8) } + let(:body) { { query: { match_all: {} } } } + + it 'raises an Esse::Transport::ReadonlyClusterError exception when the cluster is readonly' do + es_client do |client, _conf, cluster| + cluster.warm_up! + expect(client).not_to receive(:perform_request) + cluster.readonly = true + expect { + VenuesIndex.delete_by_query(body: body, **params) + }.to raise_error(Esse::Transport::ReadonlyClusterError) + end + end + + it 'raises an Esse::Transport::ServerError exception when api throws an error' do + es_client do |client, _conf, cluster| + expect { + VenuesIndex.delete_by_query(body: body, **params) + }.to raise_error(Esse::Transport::NotFoundError) + end + end + + it 'deletes the documents in the aliased index' do + es_client do |client, _conf, cluster| + VenuesIndex.create_index(alias: true, suffix: index_suffix) + VenuesIndex.import(refresh: true, suffix: index_suffix, **params) + + resp = nil + expect { + resp = VenuesIndex.delete_by_query(body: body, **params) + }.not_to raise_error + expect(resp['total']).to eq(total_venues) + expect(resp['deleted']).to eq(total_venues) + end + end + + it 'deletes the documents in the unaliased index' do + es_client do |client, _conf, cluster| + VenuesIndex.create_index(alias: false, suffix: index_suffix) + VenuesIndex.import(refresh: true, suffix: index_suffix, **params) + + resp = nil + expect { + resp = VenuesIndex.delete_by_query(body: body, suffix: index_suffix, **params) + }.not_to raise_error + expect(resp['total']).to eq(total_venues) + expect(resp['deleted']).to eq(total_venues) + end + end +end diff --git a/spec/support/shared_examples/transport_delete_by_query.rb b/spec/support/shared_examples/transport_delete_by_query.rb new file mode 100644 index 0000000..17cc448 --- /dev/null +++ b/spec/support/shared_examples/transport_delete_by_query.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +RSpec.shared_examples 'transport#delete_by_query' do |doc_type: false| + let(:params) do + doc_type ? { type: 'geo' } : {} + end + let(:body) do + { + settings: { + index: { + number_of_shards: 1, + number_of_replicas: 0 + } + } + } + end + + it 'raises an Esse::Transport::ReadonlyClusterError exception when the cluster is readonly' do + es_client do |client, _conf, cluster| + cluster.warm_up! + expect(client).not_to receive(:perform_request) + cluster.readonly = true + expect { + cluster.api.delete_by_query(**params, index: "#{cluster.index_prefix}_redonly", body: { q: '*' }) + }.to raise_error(Esse::Transport::ReadonlyClusterError) + end + end + + it 'raises an #