Skip to content

Commit

Permalink
log: add record state and reorganize load calls
Browse files Browse the repository at this point in the history
  • Loading branch information
zzacharo committed Oct 15, 2024
1 parent b8d9526 commit da42040
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 32 deletions.
36 changes: 15 additions & 21 deletions cds_migrator_kit/rdm/migration/load/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,10 @@ def _load_communities(self, draft, entry):
parent.communities.add(community)
parent.commit()

def _load_versions(self, draft, entry):
def _load_versions(self, draft, entry, logger):
"""Load other versions of the record."""
draft_files = entry["draft_files"]
legacy_recid = entry["record"]["recid"]

def publish_and_mint_recid(draft, version):
record = current_rdm_records_service.publish(system_identity, draft["id"])
Expand All @@ -154,9 +155,7 @@ def publish_and_mint_recid(draft, version):
record._record.commit()
# it seems more intuitive if we mint the lrecid for parent
# but then we get a double redirection
legacy_recid_minter(
entry["record"]["recid"], record._record.parent.model.id
)
legacy_recid_minter(legacy_recid, record._record.parent.model.id)
return record

if not draft_files:
Expand Down Expand Up @@ -197,17 +196,18 @@ def publish_and_mint_recid(draft, version):

record = publish_and_mint_recid(draft, version)
records.append(record._record)
return records

if records:
record_state_context = self._load_record_state(legacy_recid, records)
# Dump the computed record state. This is useful to migrate then the record stats
if record_state_context:
logger.add_record_state(record_state_context)

def _load_model_fields(self, draft, entry):
"""Load model fields of the record."""

draft._record.model.created = arrow.get(entry["record"]["created"]).datetime
draft._record.model.updated = arrow.get(entry["record"]["created"]).datetime
draft._record.commit()
# TODO we can use unit of work when it is moved to invenio-db module
self._load_access(draft, entry)
self._load_communities(draft, entry)
db.session.commit()

def _dry_load(self, entry):
current_rdm_records_service.schema.load(
Expand Down Expand Up @@ -300,19 +300,13 @@ def _load(self, entry):
draft = current_rdm_records_service.create(
identity, data=entry["record"]["json"]
)

self._load_model_fields(draft, entry)
# TODO we can use unit of work when it is moved to invenio-db module
self._load_access(draft, entry)
self._load_communities(draft, entry)
db.session.commit()
self._load_versions(draft, entry, migration_logger)

records = self._load_versions(draft, entry)

if records:
legacy_recid = entry["record"]["recid"]
record_state_context = self._load_record_state(
legacy_recid, records
)
# Dump the computed record state. This is useful to migrate then the record stats
if record_state_context:
migration_logger.add_record_state(record_state_context)
migration_logger.add_success(recid)
except Exception as e:
exc = ManualImportRequired(
Expand Down
2 changes: 1 addition & 1 deletion cds_migrator_kit/rdm/migration/streams.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ records:
transform:
files_dump_dir: cds_migrator_kit/rdm/migration/data/summer_student_reports/files/
missing_users: cds_migrator_kit/rdm/migration/data/users
community_slug: 23cca474-1edb-42aa-a613-c79c55357a4c
community_id: 69c22055-7249-4115-b1c1-0904a04877ca
8 changes: 4 additions & 4 deletions cds_migrator_kit/rdm/migration/transform/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,21 +331,21 @@ def __init__(
throw=True,
files_dump_dir=None,
missing_users=None,
community_slug=None,
community_id=None,
dry_run=False,
):
"""Constructor."""
self.files_dump_dir = Path(files_dump_dir).absolute().as_posix()
self.missing_users_dir = Path(missing_users).absolute().as_posix()
self.community_slug = community_slug
self.community_id = community_id
self.dry_run = dry_run
super().__init__(workers, throw)

def _community_id(self, entry, record):
communities = record.get("communities", [])
communities = [self.community_slug] + [slug for slug in communities]
communities = [self.community_id] + [slug for slug in communities]
if communities:
return {"ids": communities, "default": self.community_slug}
return {"ids": communities, "default": self.community_id}
return {}

def _parent(self, entry, record):
Expand Down
13 changes: 7 additions & 6 deletions cds_migrator_kit/records/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,10 @@ def start_log(self):
# init log files
self.error_file = open(self.STAT_FILEPATH, "w")
self.record_dump_file = open(self.RECORD_FILEPATH, "w")
self.records_state_dump_file = open(self.RECORD_STATE_FILEPATH, "w")
self.error_file.truncate(0)
self.record_dump_file.truncate(0)
self.records_state_dump_file.truncate(0)
columns = [
"recid",
"stage",
Expand All @@ -114,6 +116,7 @@ def start_log(self):
self.log_writer = csv.DictWriter(self.error_file, fieldnames=columns)
self.log_writer.writeheader()
self.record_dump_file.write("{\n")
self.records_state_dump_file.write("[\n")

def read_log(self):
self.error_file = open(self.STAT_FILEPATH, "r")
Expand All @@ -130,15 +133,17 @@ def finalise(self):
self.error_file.close()
self.record_dump_file.write("}")
self.record_dump_file.close()
self.records_state_dump_file.write("]")
self.records_state_dump_file.close()

def add_record(self, record, **kwargs):
"""Add record to list of collected records."""
recid = record["legacy_recid"]
self.record_dump_file.write(f'"{recid}": {json.dumps(record)},\n')

def add_record_state(self, record, **kwargs):
def add_record_state(self, record_state, **kwargs):
"""Add record state."""
pass
self.records_state_dump_file.write(f"{json.dumps(record_state)},\n")

def add_log(self, exc, record=None, key=None, value=None):
"""Add exception log."""
Expand Down Expand Up @@ -180,7 +185,3 @@ def __init__(self):
"rdm_records_dump.json",
"rdm_records_state.json",
)

def add_record_state(self, record_state):
"""Add record state."""
self.records_state.append(record_state)

0 comments on commit da42040

Please sign in to comment.