Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions cds_migrator_kit/rdm/records/transform/models/eco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2026 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""CDS-RDM ECO model."""

from cds_migrator_kit.rdm.records.transform.models.base_record import (
rdm_base_record_model,
)
from cds_migrator_kit.transform.overdo import CdsOverdo


class ECOModel(CdsOverdo):
"""Translation model for ECO records."""

__query__ = """
(
980__:POSTER
OR (980__:BROCHURE AND 690C_:CERNOFFICIALPRESSBROCHURE)
OR (
(980__:BROCHURE AND 690C_:CERNEXPERIMENTBROCHURE)
OR (
980__:CMSOUTREACH
AND (
6531_.a:Brochure
OR 6531_.a:brochure
OR 6531_a:Brochure
OR 6531_a:brochure
)
)
)
OR (980__:NOTE AND 710__.5:IR)
)
AND -595__a:Press
AND -980__:LHCb_Misc
AND -690C_a:PRIVATLAS
"""

__ignore_keys__ = {
"0247_9", # source of pid, only value: OSTI, 2948638, 2853279
"0248_a",
"0248_p",
"0248_q",
"035__d", # oai harvest tag
"035__h", # oai harvest tag
"035__m", # oai harvest tag
"100__m", # email of contributor
"245__9", # source of title, only value: submitter
"270__m", # email of contact person - TODO: is it okay to ignore? example: 2908973
"270__p", # contact person name - TODO: is it okay to ignore?
"300__a", # number of pages
"340__a", # Physical medium
"520__9", # abstract provenance
"541__e", # Original source poster https://cds.cern.ch/record/2695195/export/hm
"594__a", # PUB: 2749806, 2749822
"6531_9", # scheme of keywords
"700__m", # email of contributor
"773__p", # display name of the related link TODO: is it okay to ignore?
"773__y", # year, TODO: is it okay to ignore? https://cds.cern.ch/record/1452204/export/xm
"773__v", # TODO: is it okay to ignore? https://cds.cern.ch/record/1452204/export/xm
"852__c",
"852__h",
"8560_f", # contact email
"8564_8", # file id
"8564_s", # bibdoc id
"8564_x", # icon thumbnails sizes
"8564_y", # file description - handled by files dump
"8564_z", # DM metadata
"937__c", # last modified by
"937__s", # last modification date
"960__a", # base number
"961__a", # CDS modification tag
"961__b", # CDS modification tag
"961__c", # CDS modification tag
"961__h", # CDS modification tag
"961__l", # CDS modification tag
"961__x", # CDS modification tag
"981__a", # duplicate record id
}

_default_fields = {
"custom_fields": {},
"languages": [],
"related_identifiers": [],
"creators": [{"person_or_org": {"type": "organizational", "name": "CERN"}}],
}


eco_model = ECOModel(
bases=(rdm_base_record_model,),
entry_point_group="cds_migrator_kit.migrator.rules.eco",
)
2 changes: 2 additions & 0 deletions cds_migrator_kit/rdm/records/transform/models/it.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ class ITModel(CdsOverdo):
-980__:BOOK
-690C_:YELLOWREPORT
-690C_:"YELLOW REPORT"
-690C_:CERNOFFICIALPRESSBROCHURE
-690C_:CERNEXPERIMENTBROCHURE
-980__:THESIS
-980__:INTNOTECMSPUBL
"""
Expand Down
2 changes: 1 addition & 1 deletion cds_migrator_kit/rdm/records/transform/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ def field_experiments(record_json, custom_fields_dict):
"cern:experiments", []
)
for experiment in experiments:
if experiment.lower().strip() == "not applicable":
if experiment.lower().strip() in ["not applicable", "select:"]:
continue
result = search_vocabulary(experiment, "experiments")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def created(self, key, value):
source = clean_val("s", value, str)
# h = human catalogued
# n = script catalogued or via submission
if source not in ["n", "h", "m", "r"]:
raise UnexpectedValue(subfield="s", field=key, value=value)
if source not in ["n", "h", "m", "r", "d"]:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does d mean?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

311 record has d in the source field. I checked but couldnt find the meaning of d. Maybe digitized? I'll add a question to curation sheet.
Some example recids: 43247, 43430, 824753, 1221556

raise UnexpectedValue(subfield="s", field=key, value=source)
date_values = value.get("w")
if not date_values or not date_values[0]:
return datetime.date.today().isoformat()
Expand Down Expand Up @@ -797,6 +797,10 @@ def related_identifiers_787(self, key, value):
"relation_type": {"id": "references"},
"resource_type": {"id": "publication-conferencepaper"},
},
"paper": {
"relation_type": {"id": "references"},
"resource_type": {"id": "publication-article"},
},
}

if recid:
Expand Down
Loading
Loading