Skip to content

Commit

Permalink
stats: add docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
zzacharo committed Oct 15, 2024
1 parent da42040 commit bb868f0
Show file tree
Hide file tree
Showing 9 changed files with 57 additions and 37 deletions.
8 changes: 3 additions & 5 deletions cds_migrator_kit/rdm/migration/stats/event_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@


def process_download_event(entry, rec_context):
"""Format
Entry from legacy stat events:
"""Entry from legacy stat events format.
{
"id_bibrec": 2884810,
Expand Down Expand Up @@ -96,9 +94,8 @@ def process_download_event(entry, rec_context):


def process_pageview_event(entry, rec_context):
"""Format
"""Entry from legacy stat events format.
Entry from legacy stat events:
{
"_index": "cds-2023",
"_id": "AYy3LvO8Bd18JHv_G38-",
Expand Down Expand Up @@ -167,6 +164,7 @@ def process_pageview_event(entry, rec_context):


def prepare_new_doc(data, rec_context, logger, doc_type):
"""Produce a new statistic event for the destination cluster."""
for doc in data["hits"]["hits"]:
try:
new_doc = deepcopy(doc)
Expand Down
1 change: 1 addition & 0 deletions cds_migrator_kit/rdm/migration/stats/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


def setup_logger(name, filename, level=logging.INFO):
"""Setup statistics migration logger."""
logger = logging.getLogger(name)
logger.setLevel(level)

Expand Down
8 changes: 4 additions & 4 deletions cds_migrator_kit/rdm/migration/stats/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from .search import src_os_client, dest_os_client, os_search, os_scroll


def generate_new_events(os_client, data, rec_context, logger, doc_type, dry_run=True):
def _generate_new_events(os_client, data, rec_context, logger, doc_type, dry_run=True):
try:
new_docs = prepare_new_doc(data, rec_context, logger, doc_type)
if dry_run:
Expand All @@ -31,7 +31,7 @@ def generate_new_events(os_client, data, rec_context, logger, doc_type, dry_run=
logger.error(ex)


def run_process(index, t, recid, rec_context, dry_run=True):
def _run_process(index, t, recid, rec_context, dry_run=True):
logger = logging.getLogger("{0}-{1}-logger".format(index, t))
if not logger.handlers:
# Avoid adding multiple handlers
Expand All @@ -49,7 +49,7 @@ def run_process(index, t, recid, rec_context, dry_run=True):
scroll_size = len(data["hits"]["hits"])
total = data["hits"]["total"]["value"]
logger.info("Total number of results for id: {0} <{1}>".format(total, recid))
generate_new_events(
_generate_new_events(
dest_os_client, data, rec_context, logger, doc_type=t, dry_run=dry_run
)
tot_chunks = total // SRC_SEARCH_SIZE
Expand All @@ -72,7 +72,7 @@ def run_process(index, t, recid, rec_context, dry_run=True):
if total == 0:
continue

generate_new_events(
_generate_new_events(
dest_os_client,
data,
rec_context,
Expand Down
4 changes: 3 additions & 1 deletion cds_migrator_kit/rdm/migration/transform/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@


class CDSToRDMRecordEntry(RDMRecordEntry):
"""Transform Zenodo record to RDM record."""
"""Transform CDS record to RDM record."""

def __init__(
self,
Expand All @@ -50,6 +50,7 @@ def __init__(
missing_users_filename="people.csv",
dry_run=False,
):
"""Constructor."""
self.missing_users_dir = missing_users_dir
self.missing_users_filename = missing_users_filename
self.dry_run = dry_run
Expand Down Expand Up @@ -450,6 +451,7 @@ def _record_files(self, entry, record):
return []

def run(self, entries):
"""Run transformation step."""
return super().run(entries)

#
Expand Down
37 changes: 21 additions & 16 deletions cds_migrator_kit/rdm/migration/transform/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@


class CDSUserEntry(Entry):
"""Transform CDS user record to RDM user record."""

def _email(self, entry):
"""Returns the email."""
Expand Down Expand Up @@ -73,34 +74,40 @@ def _transform(self, entry):


class CDSUserIntermediaryLoad(Load):
"""CDS user intermediate load class."""

def __init__(self, filepath, **kwargs):
"""Constructor."""
self.filepath = filepath
self.dumpfile = open(self.filepath, 'w', newline='')
fieldnames = ['email', 'person_id', "surname", "given_names", "department"]
self.dumpfile = open(self.filepath, "w", newline="")
fieldnames = ["email", "person_id", "surname", "given_names", "department"]
self.writer = csv.DictWriter(self.dumpfile, fieldnames=fieldnames)
self.writer.writeheader()

def _load(self, entry, *args, **kwargs):
self.writer.writerow({'email': entry["email"],
'person_id': entry["person_id"],
"surname": entry['surname'].upper(),
"given_names": entry['given_names'],
"department": entry['department']
})
self.writer.writerow(
{
"email": entry["email"],
"person_id": entry["person_id"],
"surname": entry["surname"].upper(),
"given_names": entry["given_names"],
"department": entry["department"],
}
)

def _cleanup(self): # pragma: no cover
"""Cleanup data after loading."""
pass


class CDSMissingUserLoad:
"""CDS missing user load class."""

def __init__(self, remote_account_client_id=None):
"""Constructor."""
self.client_id = current_app.config["CERN_APP_CREDENTIALS"][
"consumer_key"
]
self.client_id = current_app.config["CERN_APP_CREDENTIALS"]["consumer_key"]

def create_invenio_user(self, email,username):
def create_invenio_user(self, email, username):
"""Commit new user in db."""
try:
user = User(email=email, username=username, active=False)
Expand Down Expand Up @@ -130,16 +137,14 @@ def create_invenio_user_profile(self, user, name):

def create_invenio_remote_account(self, user_id, extra_data=None):
"""Return new user entry."""

if extra_data is None:
extra_data = {}
return RemoteAccount.create(
client_id=self.client_id,
user_id=user_id,
extra_data=extra_data
client_id=self.client_id, user_id=user_id, extra_data=extra_data
)

def create_user(self, email, name, person_id, username, extra_data=None):
"""Create an invenio user."""
user = self.create_invenio_user(email, username)
user_id = user.id

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,12 @@ class ManualImportRequired(CDSMigrationException):


class RestrictedFileDetected(CDSMigrationException):
"""Record has restricted files record."""

description = "[Restricted file detected]"


class RecordStatsNotImported(CDSMigrationException):
"""The corresponding field should be manually migrated."""
"""Record statistics error."""

description = "[RECORD STATS NOT IMPORTED]"
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,16 @@


class MarcValue(ABC):
def __init__(self, raw_value, required_type, subfield=None, required=False,
default_value=None):
"""Abstract class for Marc value."""

def __init__(
self,
raw_value,
required_type,
subfield=None,
required=False,
default_value=None,
):
"""Constructor."""
if subfield:
self.raw_value = raw_value.get(subfield)
Expand All @@ -39,9 +47,11 @@ def type(self):

def required(self):
"""Check if value present if required."""
if ((
not self.raw_value or not self.parsed_value)
and self.is_required and not self.default_value):
if (
(not self.raw_value or not self.parsed_value)
and self.is_required
and not self.default_value
):
raise MissingRequiredField(subfield=self.subfield, value=self.raw_value)
return self.is_required

Expand All @@ -59,14 +69,14 @@ def parse(self):
self.parsed_value = self._clean()
return self


class StringValue(MarcValue):

def _clean(self):
return self.raw_value.strip()

def filter_regex(self, regex):
return re.sub(regex, '', self.parsed_value, flags=re.UNICODE)

return re.sub(regex, "", self.parsed_value, flags=re.UNICODE)


class ListValue(MarcValue):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
"""Migration regex patterns."""



ALPHANUMERIC_ONLY = r'[^\w\s]'
ALPHANUMERIC_ONLY = r"[^\w\s]"
4 changes: 4 additions & 0 deletions cds_migrator_kit/records/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def __init__(self, stats_filename, records_filename, records_state_filename):
self.record_dump_file = None

def start_log(self):
"""Initialize logging file descriptors."""
# init log files
self.error_file = open(self.STAT_FILEPATH, "w")
self.record_dump_file = open(self.RECORD_FILEPATH, "w")
Expand All @@ -119,6 +120,7 @@ def start_log(self):
self.records_state_dump_file.write("[\n")

def read_log(self):
"""Read error log file."""
self.error_file = open(self.STAT_FILEPATH, "r")
reader = csv.DictReader(self.error_file)
for row in reader:
Expand All @@ -130,6 +132,7 @@ def load_record_dumps(self):
return json.load(self.record_dump_file)

def finalise(self):
"""Finalise logging files."""
self.error_file.close()
self.record_dump_file.write("}")
self.record_dump_file.close()
Expand Down Expand Up @@ -172,6 +175,7 @@ def add_log(self, exc, record=None, key=None, value=None):
logger_migrator.error(exc)

def add_success(self, recid):
"""Log recid as success."""
self.log_writer.writerow({"recid": recid, "clean": True})


Expand Down

0 comments on commit bb868f0

Please sign in to comment.