Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions site/cds_rdm/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@
# the terms of the GPL-2.0 License; see LICENSE file for more details.

"""CDS-RDM module."""
from datetime import datetime

from flask import current_app
from invenio_jobs.logging.jobs import ContextAwareOSHandler, job_context
from invenio_jobs.services import JobLogEntrySchema
from invenio_search import current_search_client
from invenio_search.utils import prefix_index

from cds_rdm.clc_sync.resources.config import CLCSyncResourceConfig
from cds_rdm.clc_sync.resources.resource import CLCSyncResource
from cds_rdm.clc_sync.resources.utils import get_clc_sync_entry
Expand All @@ -21,6 +29,44 @@
from .views import get_linked_records_search_query


class CDSContextAwareOSHandler(ContextAwareOSHandler):
"""Job log handler that preserves selected structured fields."""

_extra_fields = (
"entry_id",
"inspire_id",
"record_pid",
"report_group_key",
"report_kind",
"report_reason",
"skip_sentry",
)

def enrich_log(self, record):
"""Enrich log record with context and supported extra fields."""
context = dict(job_context.get())
log_data = {
"timestamp": datetime.now().isoformat(),
"level": record.levelname,
"message": record.getMessage(),
"module": record.module,
"function": record.funcName,
"line": record.lineno,
"context": context,
}
serialized_data = JobLogEntrySchema().load(log_data)
for field_name in self._extra_fields:
value = getattr(record, field_name, None)
if value is not None:
serialized_data[field_name] = value
return serialized_data

def index_in_os(self, log_data):
"""Send log data to OpenSearch."""
full_index_name = prefix_index(current_app.config["JOBS_LOGGING_INDEX"])
current_search_client.index(index=full_index_name, body=log_data)


class CDS_RDM_App(object):
"""CDS-RDM App."""

Expand All @@ -39,6 +85,7 @@ def init_app(self, app):
"""Flask application initialization."""
self.init_services(app)
self.init_resources(app)
self.init_job_logging(app)
app.jinja_env.globals["get_clc_sync_entry"] = get_clc_sync_entry
app.jinja_env.globals["evaluate_permissions"] = evaluate_permissions
# Register filter for building linked records search query
Expand All @@ -61,6 +108,18 @@ def init_resources(self, app):
config=HarvesterDownloadResourceConfig,
)

def init_job_logging(self, app):
"""Replace the default jobs log handler with the CDS-aware variant."""
handlers = list(app.logger.handlers)
for handler in handlers:
if type(handler) is ContextAwareOSHandler:
app.logger.removeHandler(handler)

if app.config.get("JOBS_LOGGING"):
os_handler = CDSContextAwareOSHandler()
os_handler.setLevel(app.config["JOBS_LOGGING_LEVEL"])
app.logger.addHandler(os_handler)


class CDS_RDM_UI(object):
"""CDS-RDM extension."""
Expand Down
13 changes: 10 additions & 3 deletions site/cds_rdm/harvester_download/resources/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from cds_rdm.harvester_runs.logs import (
HarvesterRunError,
fetch_harvester_run_logs,
lines_from_hits,
group_log_hits,
plain_text_log,
resolve_harvester_run,
)
Expand Down Expand Up @@ -48,8 +48,15 @@ def download(self):
raise self._http_json_error(error.message, error.code)

hits, total = fetch_harvester_run_logs(run)
lines, error_count, warning_count = lines_from_hits(hits)
logs = plain_text_log(run, lines, total, error_count, warning_count)
grouped_issues, other_lines, error_count, warning_count = group_log_hits(hits)
logs = plain_text_log(
run,
grouped_issues,
other_lines,
total,
error_count,
warning_count,
)

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"harvester_logs_{run.id}_{timestamp}.log"
Expand Down
Loading
Loading