Skip to content

Commit

Permalink
support multiple threads for polldb
Browse files Browse the repository at this point in the history
  • Loading branch information
toluaina committed Mar 12, 2022
1 parent 7f8a181 commit 64de542
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 10 deletions.
2 changes: 1 addition & 1 deletion pgsync/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

__author__ = "Tolu Aina"
__email__ = "[email protected]"
__version__ = "2.2.0"
__version__ = "2.2.1"
5 changes: 5 additions & 0 deletions pgsync/elastichelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ def _bulk(
)
raise_on_error: bool = raise_on_error or ELASTICSEARCH_RAISE_ON_ERROR

# when using multiple threads for poll_db we need to account for other
# threads performing deletions
ignore_status: Tuple[int] = (400, 404)

if ELASTICSEARCH_STREAMING_BULK:
for _ in helpers.streaming_bulk(
self.__es,
Expand Down Expand Up @@ -178,6 +182,7 @@ def _bulk(
refresh=refresh,
raise_on_exception=raise_on_exception,
raise_on_error=raise_on_error,
ignore_status=ignore_status,
):
self.doc_count += 1

Expand Down
8 changes: 4 additions & 4 deletions pgsync/redisqueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def __init__(self, name: str, namespace: str = "queue", **kwargs):
except ConnectionError as e:
logger.exception(f"Redis server is not running: {e}")
raise
self._pipeline = self.__db.pipeline()

@property
def qsize(self) -> int:
Expand All @@ -38,9 +37,10 @@ def qsize(self) -> int:
def bulk_pop(self, chunk_size: Optional[int] = None) -> List[dict]:
"""Remove and return multiple items from the queue."""
chunk_size: int = chunk_size or REDIS_READ_CHUNK_SIZE
self._pipeline.lrange(self.key, 0, chunk_size - 1)
self._pipeline.ltrim(self.key, chunk_size, -1)
items: List[List[bytes], bool] = self._pipeline.execute()
pipeline = self.__db.pipeline()
pipeline.lrange(self.key, 0, chunk_size - 1)
pipeline.ltrim(self.key, chunk_size, -1)
items: List[List[bytes], bool] = pipeline.execute()
logger.debug(f"bulk_pop nsize: {len(items[0])}")
return list(map(lambda value: json.loads(value), items[0]))

Expand Down
2 changes: 2 additions & 0 deletions pgsync/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
# page block size
BLOCK_SIZE = env.int("BLOCK_SIZE", default=2048 * 10)
QUERY_LITERAL_BINDS = env.bool("QUERY_LITERAL_BINDS", default=None)
# number of threads to spawn for poll db
NTHREADS_POLLDB = env.int("NTHREADS_POLLDB", default=1)

# Elasticsearch:
ELASTICSEARCH_SCHEME = env.str("ELASTICSEARCH_SCHEME", default="http")
Expand Down
17 changes: 14 additions & 3 deletions pgsync/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from .settings import (
CHECKPOINT_PATH,
LOG_INTERVAL,
NTHREADS_POLLDB,
POLL_TIMEOUT,
REDIS_POLL_INTERVAL,
REDIS_WRITE_CHUNK_SIZE,
Expand Down Expand Up @@ -1107,7 +1108,7 @@ def status(self):
sys.stdout.flush()
time.sleep(LOG_INTERVAL)

def receive(self) -> None:
def receive(self, nthreads_polldb=None) -> None:
"""
Receive events from db.
Expand All @@ -1119,7 +1120,9 @@ def receive(self) -> None:
"""
# start a background worker producer thread to poll the db and populate
# the Redis cache
self.poll_db()
nthreads_polldb = nthreads_polldb or NTHREADS_POLLDB
for _ in range(nthreads_polldb):
self.poll_db()

# sync up to current transaction_id
self.pull()
Expand Down Expand Up @@ -1187,6 +1190,13 @@ def receive(self) -> None:
default=False,
help="Analyse database",
)
@click.option(
"--nthreads_polldb",
"-n",
help="Number of threads to spawn for poll db",
type=int,
default=NTHREADS_POLLDB,
)
def main(
config,
daemon,
Expand All @@ -1199,6 +1209,7 @@ def main(
verbose,
version,
analyze,
nthreads_polldb,
):
"""Main application syncer."""
if version:
Expand Down Expand Up @@ -1237,7 +1248,7 @@ def main(
sync: Sync = Sync(document, verbose=verbose, **kwargs)
sync.pull()
if daemon:
sync.receive()
sync.receive(nthreads_polldb)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 2.1.11
current_version = 2.2.0
commit = True
tag = True

Expand Down
1 change: 0 additions & 1 deletion tests/test_redisqueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def test_redis_conn(self, mock_logger, mocker):
mock_get_redis_url.assert_called_once()
mock_ping.assert_called_once()
mock_logger.exception.assert_not_called()
assert queue._pipeline is not None

@patch("pgsync.redisqueue.logger")
def test_redis_conn_fail(self, mock_logger, mocker):
Expand Down

0 comments on commit 64de542

Please sign in to comment.