Skip to content

Commit

Permalink
feat: add delete_by_query action
Browse files Browse the repository at this point in the history
  • Loading branch information
marcosgz committed Sep 11, 2024
1 parent 0146d4f commit cb839a3
Show file tree
Hide file tree
Showing 11 changed files with 215 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Rename lazy_update_document_attributes to update_lazy_attributes
* Rename eager_include_document_attributes to eager_load_lazy_attributes
* Add preload_lazy_attributes option to the import in order to fetch the lazy attributes in a single query before bulk indexing
* Add `delete_by_query` action to transport and index APIs

## 0.3.6 - 2024-08-07
* Esse::LazyDocumentHeader#to_doc return `Esse::DocumentForPartialUpdate` instance to properly separate context metadata from document source
Expand Down
1 change: 1 addition & 0 deletions lib/esse/events.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,6 @@ module Events
register_event 'elasticsearch.get'
register_event 'elasticsearch.reindex'
register_event 'elasticsearch.update_by_query'
register_event 'elasticsearch.delete_by_query'
end
end
14 changes: 14 additions & 0 deletions lib/esse/index/documents.rb
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,20 @@ def update_by_query(suffix: nil, **options)
cluster.api.update_by_query(**definition)
end

# Delete documents by query
#
# @param options [Hash] Hash of paramenters that will be passed along to elasticsearch request
# @option [String, nil] :suffix The index suffix. Defaults to the nil.
#
# @return [Hash] The elasticsearch response hash
def delete_by_query(suffix: nil, **options)
definition = {
index: index_name(suffix: suffix),
}.merge(options)
cluster.may_update_type!(definition)
cluster.api.delete_by_query(**definition)
end

protected

def document?(doc)
Expand Down
45 changes: 45 additions & 0 deletions lib/esse/transport/indices.rb
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,51 @@ def update_by_query(index:, **options)
payload[:response] = coerce_exception { client.update_by_query(**opts) }
end
end

# Deletes documents matching the provided query.
#
# @option arguments [List] :index A comma-separated list of index names to search; use `_all` or empty string to perform the operation on all indices
# @option arguments [String] :analyzer The analyzer to use for the query string
# @option arguments [Boolean] :analyze_wildcard Specify whether wildcard and prefix queries should be analyzed (default: false)
# @option arguments [String] :default_operator The default operator for query string query (AND or OR) (options: AND, OR)
# @option arguments [String] :df The field to use as default where no field prefix is given in the query string
# @option arguments [Number] :from Starting offset (default: 0)
# @option arguments [Boolean] :ignore_unavailable Whether specified concrete indices should be ignored when unavailable (missing or closed)
# @option arguments [Boolean] :allow_no_indices Whether to ignore if a wildcard indices expression resolves into no concrete indices. (This includes `_all` string or when no indices have been specified)
# @option arguments [String] :conflicts What to do when the delete by query hits version conflicts? (options: abort, proceed)
# @option arguments [String] :expand_wildcards Whether to expand wildcard expression to concrete indices that are open, closed or both. (options: open, closed, hidden, none, all)
# @option arguments [Boolean] :lenient Specify whether format-based query failures (such as providing text to a numeric field) should be ignored
# @option arguments [String] :preference Specify the node or shard the operation should be performed on (default: random)
# @option arguments [String] :q Query in the Lucene query string syntax
# @option arguments [List] :routing A comma-separated list of specific routing values
# @option arguments [Time] :scroll Specify how long a consistent view of the index should be maintained for scrolled search
# @option arguments [String] :search_type Search operation type (options: query_then_fetch, dfs_query_then_fetch)
# @option arguments [Time] :search_timeout Explicit timeout for each search request. Defaults to no timeout.
# @option arguments [Number] :max_docs Maximum number of documents to process (default: all documents)
# @option arguments [List] :sort A comma-separated list of <field>:<direction> pairs
# @option arguments [Number] :terminate_after The maximum number of documents to collect for each shard, upon reaching which the query execution will terminate early.
# @option arguments [List] :stats Specific 'tag' of the request for logging and statistical purposes
# @option arguments [Boolean] :version Specify whether to return document version as part of a hit
# @option arguments [Boolean] :request_cache Specify if request cache should be used for this request or not, defaults to index level setting
# @option arguments [Boolean] :refresh Should the affected indexes be refreshed?
# @option arguments [Time] :timeout Time each individual bulk request should wait for shards that are unavailable.
# @option arguments [String] :wait_for_active_shards Sets the number of shard copies that must be active before proceeding with the delete by query operation. Defaults to 1, meaning the primary shard only. Set to `all` for all shard copies, otherwise set to any non-negative value less than or equal to the total number of copies for the shard (number of replicas + 1)
# @option arguments [Number] :scroll_size Size on the scroll request powering the delete by query
# @option arguments [Boolean] :wait_for_completion Should the request should block until the delete by query is complete.
# @option arguments [Number] :requests_per_second The throttle for this request in sub-requests per second. -1 means no throttle.
# @option arguments [Number|string] :slices The number of slices this task should be divided into. Defaults to 1, meaning the task isn't sliced into subtasks. Can be set to `auto`.
# @option arguments [Hash] :headers Custom HTTP headers
# @option arguments [Hash] :body The search definition using the Query DSL (*Required*)
#
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-delete-by-query.html
def delete_by_query(index:, **options)
throw_error_when_readonly!

Esse::Events.instrument('elasticsearch.delete_by_query') do |payload|
payload[:request] = opts = options.merge(index: index)
payload[:response] = coerce_exception { client.delete_by_query(**opts) }
end
end
end

include InstanceMethods
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# frozen_string_literal: true

require 'spec_helper'
require 'support/shared_examples/transport_delete_by_query'

stack_describe 'elasticsearch', '5.x', Esse::Transport, '#delete_by_query' do
include_examples 'transport#delete_by_query', doc_type: true
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# frozen_string_literal: true

require 'spec_helper'
require 'support/shared_examples/transport_delete_by_query'

stack_describe 'elasticsearch', '6.x', Esse::Transport, '#delete_by_query' do
include_examples 'transport#delete_by_query', doc_type: true
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# frozen_string_literal: true

require 'spec_helper'
require 'support/shared_examples/index_documents_delete_by_query'

stack_describe 'elasticsearch', '7.x', Esse::Index, '.delete_by_query' do
include_examples 'index.delete_by_query'
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# frozen_string_literal: true

require 'spec_helper'
require 'support/shared_examples/transport_delete_by_query'

stack_describe 'elasticsearch', '7.x', Esse::Transport, '#delete_by_query' do
include_examples 'transport#delete_by_query'
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# frozen_string_literal: true

require 'spec_helper'
require 'support/shared_examples/index_documents_delete_by_query'

stack_describe 'elasticsearch', '8.x', Esse::Index, '.delete_by_query' do
include_examples 'index.delete_by_query'
end
61 changes: 61 additions & 0 deletions spec/support/shared_examples/index_documents_delete_by_query.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# frozen_string_literal: true

RSpec.shared_examples 'index.delete_by_query' do |doc_type: false|
include_context 'with venues index definition'

let(:params) do
doc_type ? { type: 'venue' } : {}
end
let(:doc_params) do
doc_type ? { _type: 'venue' } : {}
end
let(:index_suffix) { SecureRandom.hex(8) }
let(:body) { { query: { match_all: {} } } }

it 'raises an Esse::Transport::ReadonlyClusterError exception when the cluster is readonly' do
es_client do |client, _conf, cluster|
cluster.warm_up!
expect(client).not_to receive(:perform_request)
cluster.readonly = true
expect {
VenuesIndex.delete_by_query(body: body, **params)
}.to raise_error(Esse::Transport::ReadonlyClusterError)
end
end

it 'raises an Esse::Transport::ServerError exception when api throws an error' do
es_client do |client, _conf, cluster|
expect {
VenuesIndex.delete_by_query(body: body, **params)
}.to raise_error(Esse::Transport::NotFoundError)
end
end

it 'deletes the documents in the aliased index' do
es_client do |client, _conf, cluster|
VenuesIndex.create_index(alias: true, suffix: index_suffix)
VenuesIndex.import(refresh: true, suffix: index_suffix, **params)

resp = nil
expect {
resp = VenuesIndex.delete_by_query(body: body, **params)
}.not_to raise_error
expect(resp['total']).to eq(total_venues)
expect(resp['deleted']).to eq(total_venues)
end
end

it 'deletes the documents in the unaliased index' do
es_client do |client, _conf, cluster|
VenuesIndex.create_index(alias: false, suffix: index_suffix)
VenuesIndex.import(refresh: true, suffix: index_suffix, **params)

resp = nil
expect {
resp = VenuesIndex.delete_by_query(body: body, suffix: index_suffix, **params)
}.not_to raise_error
expect(resp['total']).to eq(total_venues)
expect(resp['deleted']).to eq(total_venues)
end
end
end
53 changes: 53 additions & 0 deletions spec/support/shared_examples/transport_delete_by_query.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# frozen_string_literal: true

RSpec.shared_examples 'transport#delete_by_query' do |doc_type: false|
let(:params) do
doc_type ? { type: 'geo' } : {}
end
let(:body) do
{
settings: {
index: {
number_of_shards: 1,
number_of_replicas: 0
}
}
}
end

it 'raises an Esse::Transport::ReadonlyClusterError exception when the cluster is readonly' do
es_client do |client, _conf, cluster|
cluster.warm_up!
expect(client).not_to receive(:perform_request)
cluster.readonly = true
expect {
cluster.api.delete_by_query(**params, index: "#{cluster.index_prefix}_redonly", body: { q: '*' })
}.to raise_error(Esse::Transport::ReadonlyClusterError)
end
end

it 'raises an #<Esse::Transport::NotFoundError exception when the source index does not exist' do
es_client do |_client, _conf, cluster|
expect {
cluster.api.delete_by_query(**params, index: "#{cluster.index_prefix}_non_existent_index", body: { query: { match_all: {} } })
}.to raise_error(Esse::Transport::NotFoundError)
end
end

context 'when the index exists' do
it 'reindexes the source index to the destination index' do
es_client do |client, _conf, cluster|
index_name = "#{cluster.index_prefix}_delete_by_query"
cluster.api.create_index(index: index_name, body: body)
cluster.api.index(**params, index: index_name, id: 1, body: { title: 'old title' }, refresh: true)

resp = nil
expect {
resp = cluster.api.delete_by_query(**params, index: index_name, body: { query: { match_all: {} } })
}.not_to raise_error
expect(resp['total']).to eq(1)
expect(resp['deleted']).to eq(1)
end
end
end
end

0 comments on commit cb839a3

Please sign in to comment.