diff --git a/docs/.DS_Store b/docs/.DS_Store new file mode 100644 index 0000000..5d3ce11 Binary files /dev/null and b/docs/.DS_Store differ diff --git a/docs/schema/include_access_model.yaml b/docs/schema/include_access_model.yaml index 36f3650..755ba0e 100644 --- a/docs/schema/include_access_model.yaml +++ b/docs/schema/include_access_model.yaml @@ -814,6 +814,129 @@ enums: sha1: text: sha1 title: SHA-1 + EnumAccessType: + name: EnumAccessType + definition_uri: https://includedcc.org/include-access-model/EnumAccessType + description: Types of file access levels. + from_schema: https://includedcc.org/include-access-model + permissible_values: + open: + text: open + title: Open Access + controlled: + text: controlled + title: Controlled Access + registered: + text: registered + title: Registered-tier Access + EnumExperimentalStrategy: + name: EnumExperimentalStrategy + definition_uri: https://includedcc.org/include-access-model/EnumExperimentalStrategy + description: Types of sequencing methods. + from_schema: https://includedcc.org/include-access-model + permissible_values: + wgs: + text: wgs + title: Whole Genome Sequencing + rnaseq: + text: rnaseq + title: RNA-Seq + wxs: + text: wxs + title: Whole Exome Sequencing + methlyation: + text: methlyation + title: Methylation + clr_wgs: + text: clr_wgs + title: Continuous Long Reads WGS + proteomics: + text: proteomics + title: Proteomics + targeted_seq: + text: targeted_seq + title: Targeted Sequencing + ccs_wgs: + text: ccs_wgs + title: Circular Consensus Sequencing WGS + panel: + text: panel + title: Panel + ccs_rnaseq: + text: ccs_rnaseq + title: Circular Consensus Sequencing RNA-Se + ont_wgs: + text: ont_wgs + title: Oxford Nanopore Technologies WGS + clr_rnaseq: + text: clr_rnaseq + title: Continuous Long Reads RNA-Seq + EnumAssayCenter: + name: EnumAssayCenter + definition_uri: https://includedcc.org/include-access-model/EnumAssayCenter + description: Organizations or centers producing raw or harmonized sequencing files. + from_schema: https://includedcc.org/include-access-model + permissible_values: + broad: + text: broad + title: The Broad Institute + hudsonalpha: + text: hudsonalpha + title: HudsonAlpha Institute for Biotechnology + stjude: + text: stjude + title: St. Jude + baylor: + text: baylor + title: Baylor College of Medicine + chop: + text: chop + title: The Children's Hospital of Philadelphia + other: + text: other + title: Other + unknown: + text: unknown + title: Unknown + EnumRepository: + name: EnumRepository + definition_uri: https://includedcc.org/include-access-model/EnumRepository + description: specific drs service used for registration + from_schema: https://includedcc.org/include-access-model + permissible_values: + cavatica: + text: cavatica + title: Cavatica DRS + dcf: + text: dcf + title: NCI DCF + other: + text: other + title: Other + EnumPlatform: + name: EnumPlatform + definition_uri: https://includedcc.org/include-access-model/EnumPlatform + description: names of instrument or platforms used for assay data generation + from_schema: https://includedcc.org/include-access-model + permissible_values: + illumina: + text: illumina + title: Illumina + pacbio: + text: pacbio + title: PacBio + ont: + text: ont + title: ONT + illumina_epic: + text: illumina_epic + title: Illumina Infinium HumanMethylationEPICv2 + other: + text: other + title: Other + unknown: + text: unknown + title: Unknown slots: study_id: name: study_id @@ -822,10 +945,12 @@ slots: title: Study ID from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:study_id - owner: StudyMetadata + owner: FileAdmin domain_of: - Study - StudyMetadata + - File + - FileAdmin range: Study multivalued: false do_id: @@ -849,13 +974,15 @@ slots: title: Study ID from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:subject_id - owner: File + owner: FileAssay domain_of: - Subject - Demographics - SubjectAssertion - Encounter - File + - FileAdmin + - FileAssay range: Subject multivalued: false assertion_id: @@ -1153,10 +1280,11 @@ slots: title: Data Category from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:data_category - owner: File + owner: FileAssay domain_of: - StudyMetadata - File + - FileAssay range: EnumDataCategory clinical_data_source_type: name: clinical_data_source_type @@ -1525,11 +1653,13 @@ slots: title: Sample ID from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:sample_id - owner: File + owner: FileAssay domain_of: - Sample - Aliquot - File + - FileAdmin + - FileAssay range: Sample parent_sample_id: name: parent_sample_id @@ -1777,10 +1907,12 @@ slots: title: File ID from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:file_id - owner: Dataset + owner: FileAssay domain_of: - File - Dataset + - FileAdmin + - FileAssay range: File filename: name: filename @@ -1793,6 +1925,7 @@ slots: domain_of: - File range: string + required: true format: name: format definition_uri: https://includedcc.org/include-access-model/format @@ -1800,10 +1933,12 @@ slots: title: File Format from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:format - owner: File + owner: FileAssay domain_of: - File + - FileAssay range: EnumEDAMFormats + required: true data_type: name: data_type definition_uri: https://includedcc.org/include-access-model/data_type @@ -1811,10 +1946,12 @@ slots: title: Data Type from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:data_type - owner: File + owner: FileAssay domain_of: - File + - FileAssay range: EnumEDAMDataTypes + required: true size: name: size definition_uri: https://includedcc.org/include-access-model/size @@ -1822,10 +1959,13 @@ slots: title: File Size from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:size - owner: File + owner: FileAssay domain_of: - File + - FileAdmin + - FileAssay range: integer + required: true unit: ucum_code: By staging_url: @@ -1835,10 +1975,12 @@ slots: title: Staging Location from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:staging_url - owner: File + owner: FileAdmin domain_of: - File + - FileAdmin range: uriorcurie + required: false release_url: name: release_url definition_uri: https://includedcc.org/include-access-model/release_url @@ -1846,10 +1988,12 @@ slots: title: Release Location from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:release_url - owner: File + owner: FileAdmin domain_of: - File + - FileAdmin range: uriorcurie + required: false drs_uri: name: drs_uri definition_uri: https://includedcc.org/include-access-model/drs_uri @@ -1857,10 +2001,12 @@ slots: title: DRS URI from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:drs_uri - owner: File + owner: FileAdmin domain_of: - File + - FileAdmin range: uriorcurie + required: false hash: name: hash definition_uri: https://includedcc.org/include-access-model/hash @@ -1868,10 +2014,12 @@ slots: title: File Hash from_schema: https://includedcc.org/include-access-model slot_uri: includedcc:hash - owner: File + owner: FileAdmin domain_of: - File + - FileAdmin range: FileHash + required: true inlined: true inlined_as_list: true hash_type: @@ -1885,6 +2033,7 @@ slots: domain_of: - FileHash range: EnumFileHashType + required: true hash_value: name: hash_value definition_uri: https://includedcc.org/include-access-model/hash_value @@ -1896,6 +2045,7 @@ slots: domain_of: - FileHash range: string + required: true dataset_id: name: dataset_id definition_uri: https://includedcc.org/include-access-model/dataset_id @@ -1929,6 +2079,465 @@ slots: domain_of: - Dataset range: string + s3_file_path: + name: s3_file_path + definition_uri: https://includedcc.org/include-access-model/s3_file_path + description: The full s3 url of a file's location in aws + title: S3 File Path + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:s3_file_path + owner: FileAdmin + domain_of: + - File + - FileAdmin + range: string + required: true + is_released: + name: is_released + definition_uri: https://includedcc.org/include-access-model/is_released + description: A flag that notes whether a file has been released to the public + title: Is Released + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:is_released + owner: FileAdmin + domain_of: + - FileAdmin + range: boolean + required: true + is_registered: + name: is_registered + definition_uri: https://includedcc.org/include-access-model/is_registered + description: A flag that notes whether a file has been registered to a drs service + title: Is Registered + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:is_registered + owner: FileAdmin + domain_of: + - FileAdmin + range: boolean + required: true + repository: + name: repository + definition_uri: https://includedcc.org/include-access-model/repository + description: The name of the drs service which files are registered to + title: Repository + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:repository + owner: FileAdmin + domain_of: + - FileAdmin + range: EnumRepository + required: false + file_category: + name: file_category + definition_uri: https://includedcc.org/include-access-model/file_category + description: A high level classification of the file used for operations. + title: File Category + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:file_category + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + s3_key: + name: s3_key + definition_uri: https://includedcc.org/include-access-model/s3_key + description: The unique identifier for an object within a bucket + title: S3 Key + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:s3_key + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + file_extension: + name: file_extension + definition_uri: https://includedcc.org/include-access-model/file_extension + description: A 3-4 letter code at the end of a filename that identifies the file + format. + title: File Extension + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:file_extension + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + data_transfer_id: + name: data_transfer_id + definition_uri: https://includedcc.org/include-access-model/data_transfer_id + description: A jira ticket number associated with a file transfer request to production + bucket + title: Data Transfer ID + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:data_transfer_id + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: false + aws_account_id: + name: aws_account_id + definition_uri: https://includedcc.org/include-access-model/aws_account_id + description: A 12-digit number that uniquely identifies a specific AWS account + title: AWS Account ID + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:aws_account_id + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + account_name: + name: account_name + definition_uri: https://includedcc.org/include-access-model/account_name + description: A user-defined label used to define an AWS accoun. + title: AWS Account Name + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:account_name + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + account_alias: + name: account_alias + definition_uri: https://includedcc.org/include-access-model/account_alias + description: A unique user-defined string that replaces the AWS Account ID in + the IAM user sign-in URL + title: Account Alias + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:account_alias + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + bucket_study_id: + name: bucket_study_id + definition_uri: https://includedcc.org/include-access-model/bucket_study_id + description: The global study ID used to create the bucket + title: Bucket Study ID + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:bucket_study_id + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + bucket: + name: bucket + definition_uri: https://includedcc.org/include-access-model/bucket + description: Cloud storage container in AWS used to manage and store s3 objects + title: Bucket + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:bucket + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + s3_created_at: + name: s3_created_at + definition_uri: https://includedcc.org/include-access-model/s3_created_at + description: Timestamp of when a file was uploaded to an s3 bucket. + title: S3 Created At + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:s3_created_at + owner: FileAdmin + domain_of: + - FileAdmin + range: datetime + required: true + s3_modified_at: + name: s3_modified_at + definition_uri: https://includedcc.org/include-access-model/s3_modified_at + description: Timestamp of when a file was modified in an s3 bucket. + title: S3 Modified At + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:s3_modified_at + owner: FileAdmin + domain_of: + - FileAdmin + range: datetime + required: true + intelligent_tiering_access_tier: + name: intelligent_tiering_access_tier + definition_uri: https://includedcc.org/include-access-model/intelligent_tiering_access_tier + description: Storage access tier assigned by AWS intelliegnt tiering, indicating + the current access frequency classification of the object + title: Intelligent Tiering Access Tier + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:intelligent_tiering_access_tier + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + is_delete_marker: + name: is_delete_marker + definition_uri: https://includedcc.org/include-access-model/is_delete_marker + description: A flag that notes whether a file has been deleted from s3 + title: Is Delete Marker + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:is_delete_marker + owner: FileAdmin + domain_of: + - FileAdmin + range: boolean + required: true + is_latest: + name: is_latest + definition_uri: https://includedcc.org/include-access-model/is_latest + description: Specifies whether an object version is the most recent version of + that object + title: Is Latest + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:is_latest + owner: FileAdmin + domain_of: + - FileAdmin + range: boolean + required: true + storage_class: + name: storage_class + definition_uri: https://includedcc.org/include-access-model/storage_class + description: Storage tier of the object in AWS reflecting cost and access characteristics. + title: Storage Class + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:storage_class + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + manifest_hash_value: + name: manifest_hash_value + definition_uri: https://includedcc.org/include-access-model/manifest_hash_value + description: The provided hash value from external users to be validated against + internal hash values + title: Manifest Hash Value + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:manifest_hash_value + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: false + file_hash_validation_status: + name: file_hash_validation_status + definition_uri: https://includedcc.org/include-access-model/file_hash_validation_status + description: Notes whether hashes have been generated and verified against manifest + hash values. + title: File Hash Validation Status + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:file_hash_validation_status + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: false + file_type: + name: file_type + definition_uri: https://includedcc.org/include-access-model/file_type + description: An internal type or classification of the files based on its operational + usuage. + title: File Type + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:file_type + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + encryption_status: + name: encryption_status + definition_uri: https://includedcc.org/include-access-model/encryption_status + description: Indicates whether the object in AWS is encrypted and the type of + encryption applied. + title: Encryption Status + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:encryption_status + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + is_multipart_uploaded: + name: is_multipart_uploaded + definition_uri: https://includedcc.org/include-access-model/is_multipart_uploaded + description: Indicates whether the object was uploaded using a multipart upload + process. + title: Is Multipart Uploaded + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:is_multipart_uploaded + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + object_lock_level_hold_status: + name: object_lock_level_hold_status + definition_uri: https://includedcc.org/include-access-model/object_lock_level_hold_status + description: Whether a legal hold is applied to prevent deletion of the object. + title: Object Lock Level Hold Status + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:object_lock_level_hold_status + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + object_lock_mode: + name: object_lock_mode + definition_uri: https://includedcc.org/include-access-model/object_lock_mode + description: Retention mode applied to the object that restricts deletion or modification. + title: Object Lock Mode + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:object_lock_mode + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + replication_status: + name: replication_status + definition_uri: https://includedcc.org/include-access-model/replication_status + description: Status of the object's replication to another storage location. + title: Replication Status + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:replication_status + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + version_id: + name: version_id + definition_uri: https://includedcc.org/include-access-model/version_id + description: Identifier for a specific version of the object + title: Version ID + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:version_id + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + access_type: + name: access_type + definition_uri: https://includedcc.org/include-access-model/access_type + description: Notes wheter a file is controlled, open, or registered-tier access + title: Access Type + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:access_type + owner: FileAssay + domain_of: + - FileAdmin + - FileAssay + range: EnumAccessType + required: true + access_url: + name: access_url + definition_uri: https://includedcc.org/include-access-model/access_url + description: HTTPS endpoint for accessing a file via a specific data repository + service. + title: Access URL + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:access_url + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: false + acl: + name: acl + definition_uri: https://includedcc.org/include-access-model/acl + description: The object access control list. + title: ACL + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:acl + owner: FileAdmin + domain_of: + - FileAdmin + range: string + required: true + experimental_strategy: + name: experimental_strategy + definition_uri: https://includedcc.org/include-access-model/experimental_strategy + description: Method or assay used to generate the data + title: Experimental Strategy + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:experimental_strategy + owner: FileAssay + domain_of: + - FileAdmin + - FileAssay + range: EnumExperimentalStrategy + required: true + assay_center: + name: assay_center + definition_uri: https://includedcc.org/include-access-model/assay_center + description: The organization or center that generated the file + title: Assay Center + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:assay_center + owner: FileAssay + domain_of: + - FileAssay + range: EnumAssayCenter + required: false + platform: + name: platform + definition_uri: https://includedcc.org/include-access-model/platform + description: Instrument or platform family name + title: Platform + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:platform + owner: FileAssay + domain_of: + - FileAssay + range: EnumPlatform + required: true + workflow_name: + name: workflow_name + definition_uri: https://includedcc.org/include-access-model/workflow_name + description: Processing tool that produced the file + title: Workflow Name + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:workflow_name + owner: FileAssay + domain_of: + - FileAssay + range: string + required: false + workflow_version: + name: workflow_version + definition_uri: https://includedcc.org/include-access-model/workflow_version + description: Version of the process tool that produced the file + title: Workflow Version + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:workflow_version + owner: FileAssay + domain_of: + - FileAssay + range: string + required: false + object_lock_retain_until_date: + name: object_lock_retain_until_date + definition_uri: https://includedcc.org/include-access-model/object_lock_retain_until_date + description: Specifies exact date and time when an object's Object Lock rentention + period expires. + title: Object Lock Retain Until Date + from_schema: https://includedcc.org/include-access-model + slot_uri: includedcc:object_lock_retain_until_date + owner: FileAdmin + domain_of: + - FileAdmin + range: datetime + required: true Study_study_id: name: Study_study_id definition_uri: https://includedcc.org/include-access-model/study_id @@ -2221,6 +2830,24 @@ slots: usage_slot_name: activity_definition_id range: string required: true + File_study_id: + name: File_study_id + definition_uri: https://includedcc.org/include-access-model/study_id + description: INCLUDE Global ID for the study + title: Study ID + from_schema: https://includedcc.org/include-access-model + is_a: study_id + domain: File + slot_uri: includedcc:study_id + alias: study_id + owner: File + domain_of: + - File + is_usage_slot: true + usage_slot_name: study_id + range: Study + required: true + multivalued: false File_file_id: name: File_file_id definition_uri: https://includedcc.org/include-access-model/file_id @@ -2255,6 +2882,7 @@ slots: is_usage_slot: true usage_slot_name: subject_id range: Subject + required: true multivalued: true File_sample_id: name: File_sample_id @@ -2272,7 +2900,26 @@ slots: is_usage_slot: true usage_slot_name: sample_id range: Sample + required: true multivalued: true + File_data_category: + name: File_data_category + definition_uri: https://includedcc.org/include-access-model/data_category + description: General category of data in this Record (e.g. Clinical, Genomics, + etc) + title: Data Category + from_schema: https://includedcc.org/include-access-model + is_a: data_category + domain: File + slot_uri: includedcc:data_category + alias: data_category + owner: File + domain_of: + - File + is_usage_slot: true + usage_slot_name: data_category + range: EnumDataCategory + required: true Dataset_dataset_id: name: Dataset_dataset_id definition_uri: https://includedcc.org/include-access-model/dataset_id @@ -2308,6 +2955,114 @@ slots: usage_slot_name: file_id range: File multivalued: true + FileAdmin_study_id: + name: FileAdmin_study_id + definition_uri: https://includedcc.org/include-access-model/study_id + description: INCLUDE Global ID for the study + title: Study ID + from_schema: https://includedcc.org/include-access-model + is_a: study_id + domain: FileAdmin + slot_uri: includedcc:study_id + alias: study_id + owner: FileAdmin + domain_of: + - FileAdmin + is_usage_slot: true + usage_slot_name: study_id + range: Study + required: true + multivalued: false + FileAdmin_file_id: + name: FileAdmin_file_id + definition_uri: https://includedcc.org/include-access-model/file_id + description: Unique identifier for this File. + title: File ID + from_schema: https://includedcc.org/include-access-model + is_a: file_id + domain: FileAdmin + slot_uri: includedcc:file_id + identifier: true + alias: file_id + owner: FileAdmin + domain_of: + - FileAdmin + is_usage_slot: true + usage_slot_name: file_id + range: File + required: true + FileAssay_file_id: + name: FileAssay_file_id + definition_uri: https://includedcc.org/include-access-model/file_id + description: Unique identifier for this File. + title: File ID + from_schema: https://includedcc.org/include-access-model + is_a: file_id + domain: FileAssay + slot_uri: includedcc:file_id + identifier: true + alias: file_id + owner: FileAssay + domain_of: + - FileAssay + is_usage_slot: true + usage_slot_name: file_id + range: File + required: true + FileAssay_subject_id: + name: FileAssay_subject_id + definition_uri: https://includedcc.org/include-access-model/subject_id + description: INCLUDE Global ID for the Subject + title: Study ID + from_schema: https://includedcc.org/include-access-model + is_a: subject_id + domain: FileAssay + slot_uri: includedcc:subject_id + alias: subject_id + owner: FileAssay + domain_of: + - FileAssay + is_usage_slot: true + usage_slot_name: subject_id + range: Subject + required: true + multivalued: true + FileAssay_sample_id: + name: FileAssay_sample_id + definition_uri: https://includedcc.org/include-access-model/sample_id + description: The unique identifier for this Sample. + title: Sample ID + from_schema: https://includedcc.org/include-access-model + is_a: sample_id + domain: FileAssay + slot_uri: includedcc:sample_id + alias: sample_id + owner: FileAssay + domain_of: + - FileAssay + is_usage_slot: true + usage_slot_name: sample_id + range: Sample + required: true + multivalued: true + FileAssay_data_category: + name: FileAssay_data_category + definition_uri: https://includedcc.org/include-access-model/data_category + description: General category of data in this Record (e.g. Clinical, Genomics, + etc) + title: Data Category + from_schema: https://includedcc.org/include-access-model + is_a: data_category + domain: FileAssay + slot_uri: includedcc:data_category + alias: data_category + owner: FileAssay + domain_of: + - FileAssay + is_usage_slot: true + usage_slot_name: data_category + range: EnumDataCategory + required: true classes: Record: name: Record @@ -2674,18 +3429,21 @@ classes: File: name: File definition_uri: https://includedcc.org/include-access-model/File - description: File + description: Required information for portal use. title: File from_schema: https://includedcc.org/include-access-model is_a: Record slots: - external_id + - File_study_id - File_file_id - File_subject_id - File_sample_id + - s3_file_path - filename + - size - format - - data_category + - File_data_category - data_type - format - size @@ -2694,6 +3452,9 @@ classes: - drs_uri - hash slot_usage: + study_id: + name: study_id + required: true file_id: name: file_id identifier: true @@ -2701,10 +3462,15 @@ classes: required: true subject_id: name: subject_id + required: true multivalued: true sample_id: name: sample_id + required: true multivalued: true + data_category: + name: data_category + required: true class_uri: includedcc:File FileHash: name: FileHash @@ -2742,9 +3508,106 @@ classes: description: The list of files comprising this dataset. multivalued: true class_uri: includedcc:Dataset + FileAdmin: + name: FileAdmin + definition_uri: https://includedcc.org/include-access-model/FileAdmin + description: File unvierse; contains all information about a file that may be + needed for operational work + from_schema: https://includedcc.org/include-access-model + slots: + - FileAdmin_study_id + - FileAdmin_file_id + - subject_id + - sample_id + - s3_file_path + - file_category + - size + - s3_key + - file_extension + - data_transfer_id + - aws_account_id + - account_name + - account_alias + - bucket_study_id + - bucket + - s3_created_at + - s3_modified_at + - intelligent_tiering_access_tier + - is_delete_marker + - is_latest + - storage_class + - manifest_hash_value + - file_hash_validation_status + - file_type + - encryption_status + - is_multipart_uploaded + - object_lock_level_hold_status + - object_lock_mode + - object_lock_retain_until_date + - replication_status + - version_id + - staging_url + - release_url + - hash + - access_type + - access_url + - drs_uri + - acl + - is_released + - is_registered + - repository + - experimental_strategy + slot_usage: + study_id: + name: study_id + required: true + file_id: + name: file_id + identifier: true + required: true + class_uri: includedcc:FileAdmin + FileAssay: + name: FileAssay + definition_uri: https://includedcc.org/include-access-model/FileAssay + description: A file produced by or associated with an assay or data acquisition + process including omics, imaging, actigraphy, and other experimental or observational + data. + title: File Assay + from_schema: https://includedcc.org/include-access-model + slots: + - FileAssay_file_id + - FileAssay_subject_id + - FileAssay_sample_id + - FileAssay_data_category + - experimental_strategy + - data_type + - format + - size + - access_type + - assay_center + - platform + - workflow_name + - workflow_version + slot_usage: + file_id: + name: file_id + identifier: true + required: true + subject_id: + name: subject_id + required: true + multivalued: true + sample_id: + name: sample_id + required: true + multivalued: true + data_category: + name: data_category + required: true + class_uri: includedcc:FileAssay metamodel_version: 1.7.0 source_file: include_access_model.yaml -source_file_date: '2026-03-12T11:06:57' -source_file_size: 36149 -generation_date: '2026-03-12T11:07:27' +source_file_date: '2026-04-03T17:19:20' +source_file_size: 46203 +generation_date: '2026-04-03T17:21:01' diff --git a/src/include_access_model/datamodel/include_access_model.py b/src/include_access_model/datamodel/include_access_model.py index a1be426..8e0b499 100644 --- a/src/include_access_model/datamodel/include_access_model.py +++ b/src/include_access_model/datamodel/include_access_model.py @@ -1,5 +1,5 @@ # Auto generated from include_access_model.yaml by pythongen.py version: 0.0.1 -# Generation date: 2026-03-12T11:07:16 +# Generation date: 2026-04-03T17:20:29 # Schema: include-access-model # # id: https://includedcc.org/include-access-model @@ -56,8 +56,8 @@ URIRef ) -from linkml_runtime.linkml_model.types import Float, Integer, String, Uri, Uriorcurie -from linkml_runtime.utils.metamodelcore import URI, URIorCURIE +from linkml_runtime.linkml_model.types import Boolean, Datetime, Float, Integer, String, Uri, Uriorcurie +from linkml_runtime.utils.metamodelcore import Bool, URI, URIorCURIE, XSDDateTime metamodel_version = "1.7.0" version = None @@ -145,6 +145,14 @@ class DatasetDatasetId(extended_str): pass +class FileAdminFileId(FileFileId): + pass + + +class FileAssayFileId(FileFileId): + pass + + @dataclass(repr=False) class Record(YAMLRoot): """ @@ -913,7 +921,7 @@ def __post_init__(self, *_: str, **kwargs: Any): @dataclass(repr=False) class File(Record): """ - File + Required information for portal use. """ _inherited_slots: ClassVar[list[str]] = [] @@ -923,17 +931,19 @@ class File(Record): class_model_uri: ClassVar[URIRef] = INCLUDEDCC.File file_id: Union[str, FileFileId] = None - subject_id: Optional[Union[Union[str, SubjectSubjectId], list[Union[str, SubjectSubjectId]]]] = empty_list() - sample_id: Optional[Union[Union[str, SampleSampleId], list[Union[str, SampleSampleId]]]] = empty_list() - filename: Optional[str] = None - format: Optional[Union[str, "EnumEDAMFormats"]] = None - data_category: Optional[Union[str, "EnumDataCategory"]] = None - data_type: Optional[Union[str, "EnumEDAMDataTypes"]] = None - size: Optional[int] = None + study_id: Union[str, StudyStudyId] = None + subject_id: Union[Union[str, SubjectSubjectId], list[Union[str, SubjectSubjectId]]] = None + sample_id: Union[Union[str, SampleSampleId], list[Union[str, SampleSampleId]]] = None + s3_file_path: str = None + filename: str = None + size: int = None + format: Union[str, "EnumEDAMFormats"] = None + data_category: Union[str, "EnumDataCategory"] = None + data_type: Union[str, "EnumEDAMDataTypes"] = None + hash: Union[dict, "FileHash"] = None staging_url: Optional[Union[str, URIorCURIE]] = None release_url: Optional[Union[str, URIorCURIE]] = None drs_uri: Optional[Union[str, URIorCURIE]] = None - hash: Optional[Union[dict, "FileHash"]] = None def __post_init__(self, *_: str, **kwargs: Any): if self._is_empty(self.file_id): @@ -941,23 +951,53 @@ def __post_init__(self, *_: str, **kwargs: Any): if not isinstance(self.file_id, FileFileId): self.file_id = FileFileId(self.file_id) + if self._is_empty(self.study_id): + self.MissingRequiredField("study_id") + if not isinstance(self.study_id, StudyStudyId): + self.study_id = StudyStudyId(self.study_id) + + if self._is_empty(self.subject_id): + self.MissingRequiredField("subject_id") if not isinstance(self.subject_id, list): self.subject_id = [self.subject_id] if self.subject_id is not None else [] self.subject_id = [v if isinstance(v, SubjectSubjectId) else SubjectSubjectId(v) for v in self.subject_id] + if self._is_empty(self.sample_id): + self.MissingRequiredField("sample_id") if not isinstance(self.sample_id, list): self.sample_id = [self.sample_id] if self.sample_id is not None else [] self.sample_id = [v if isinstance(v, SampleSampleId) else SampleSampleId(v) for v in self.sample_id] - if self.filename is not None and not isinstance(self.filename, str): + if self._is_empty(self.s3_file_path): + self.MissingRequiredField("s3_file_path") + if not isinstance(self.s3_file_path, str): + self.s3_file_path = str(self.s3_file_path) + + if self._is_empty(self.filename): + self.MissingRequiredField("filename") + if not isinstance(self.filename, str): self.filename = str(self.filename) - if self.data_category is not None and not isinstance(self.data_category, EnumDataCategory): + if self._is_empty(self.size): + self.MissingRequiredField("size") + if not isinstance(self.size, int): + self.size = int(self.size) + + if self._is_empty(self.data_category): + self.MissingRequiredField("data_category") + if not isinstance(self.data_category, EnumDataCategory): self.data_category = EnumDataCategory(self.data_category) - if self.size is not None and not isinstance(self.size, int): + if self._is_empty(self.size): + self.MissingRequiredField("size") + if not isinstance(self.size, int): self.size = int(self.size) + if self._is_empty(self.hash): + self.MissingRequiredField("hash") + if not isinstance(self.hash, FileHash): + self.hash = FileHash(**as_dict(self.hash)) + if self.staging_url is not None and not isinstance(self.staging_url, URIorCURIE): self.staging_url = URIorCURIE(self.staging_url) @@ -967,9 +1007,6 @@ def __post_init__(self, *_: str, **kwargs: Any): if self.drs_uri is not None and not isinstance(self.drs_uri, URIorCURIE): self.drs_uri = URIorCURIE(self.drs_uri) - if self.hash is not None and not isinstance(self.hash, FileHash): - self.hash = FileHash(**as_dict(self.hash)) - super().__post_init__(**kwargs) @@ -985,14 +1022,18 @@ class FileHash(YAMLRoot): class_name: ClassVar[str] = "FileHash" class_model_uri: ClassVar[URIRef] = INCLUDEDCC.FileHash - hash_type: Optional[Union[str, "EnumFileHashType"]] = None - hash_value: Optional[str] = None + hash_type: Union[str, "EnumFileHashType"] = None + hash_value: str = None def __post_init__(self, *_: str, **kwargs: Any): - if self.hash_type is not None and not isinstance(self.hash_type, EnumFileHashType): + if self._is_empty(self.hash_type): + self.MissingRequiredField("hash_type") + if not isinstance(self.hash_type, EnumFileHashType): self.hash_type = EnumFileHashType(self.hash_type) - if self.hash_value is not None and not isinstance(self.hash_value, str): + if self._is_empty(self.hash_value): + self.MissingRequiredField("hash_value") + if not isinstance(self.hash_value, str): self.hash_value = str(self.hash_value) super().__post_init__(**kwargs) @@ -1051,6 +1092,337 @@ def __post_init__(self, *_: str, **kwargs: Any): super().__post_init__(**kwargs) +@dataclass(repr=False) +class FileAdmin(YAMLRoot): + """ + File unvierse; contains all information about a file that may be needed for operational work + """ + _inherited_slots: ClassVar[list[str]] = [] + + class_class_uri: ClassVar[URIRef] = INCLUDEDCC["FileAdmin"] + class_class_curie: ClassVar[str] = "includedcc:FileAdmin" + class_name: ClassVar[str] = "FileAdmin" + class_model_uri: ClassVar[URIRef] = INCLUDEDCC.FileAdmin + + file_id: Union[str, FileAdminFileId] = None + study_id: Union[str, StudyStudyId] = None + s3_file_path: str = None + file_category: str = None + size: int = None + s3_key: str = None + file_extension: str = None + aws_account_id: str = None + account_name: str = None + account_alias: str = None + bucket_study_id: str = None + bucket: str = None + s3_created_at: Union[str, XSDDateTime] = None + s3_modified_at: Union[str, XSDDateTime] = None + intelligent_tiering_access_tier: str = None + is_delete_marker: Union[bool, Bool] = None + is_latest: Union[bool, Bool] = None + storage_class: str = None + file_type: str = None + encryption_status: str = None + is_multipart_uploaded: str = None + object_lock_level_hold_status: str = None + object_lock_mode: str = None + object_lock_retain_until_date: Union[str, XSDDateTime] = None + replication_status: str = None + version_id: str = None + hash: Union[dict, FileHash] = None + access_type: Union[str, "EnumAccessType"] = None + acl: str = None + is_released: Union[bool, Bool] = None + is_registered: Union[bool, Bool] = None + experimental_strategy: Union[str, "EnumExperimentalStrategy"] = None + subject_id: Optional[Union[str, SubjectSubjectId]] = None + sample_id: Optional[Union[str, SampleSampleId]] = None + data_transfer_id: Optional[str] = None + manifest_hash_value: Optional[str] = None + file_hash_validation_status: Optional[str] = None + staging_url: Optional[Union[str, URIorCURIE]] = None + release_url: Optional[Union[str, URIorCURIE]] = None + access_url: Optional[str] = None + drs_uri: Optional[Union[str, URIorCURIE]] = None + repository: Optional[Union[str, "EnumRepository"]] = None + + def __post_init__(self, *_: str, **kwargs: Any): + if self._is_empty(self.file_id): + self.MissingRequiredField("file_id") + if not isinstance(self.file_id, FileAdminFileId): + self.file_id = FileAdminFileId(self.file_id) + + if self._is_empty(self.study_id): + self.MissingRequiredField("study_id") + if not isinstance(self.study_id, StudyStudyId): + self.study_id = StudyStudyId(self.study_id) + + if self._is_empty(self.s3_file_path): + self.MissingRequiredField("s3_file_path") + if not isinstance(self.s3_file_path, str): + self.s3_file_path = str(self.s3_file_path) + + if self._is_empty(self.file_category): + self.MissingRequiredField("file_category") + if not isinstance(self.file_category, str): + self.file_category = str(self.file_category) + + if self._is_empty(self.size): + self.MissingRequiredField("size") + if not isinstance(self.size, int): + self.size = int(self.size) + + if self._is_empty(self.s3_key): + self.MissingRequiredField("s3_key") + if not isinstance(self.s3_key, str): + self.s3_key = str(self.s3_key) + + if self._is_empty(self.file_extension): + self.MissingRequiredField("file_extension") + if not isinstance(self.file_extension, str): + self.file_extension = str(self.file_extension) + + if self._is_empty(self.aws_account_id): + self.MissingRequiredField("aws_account_id") + if not isinstance(self.aws_account_id, str): + self.aws_account_id = str(self.aws_account_id) + + if self._is_empty(self.account_name): + self.MissingRequiredField("account_name") + if not isinstance(self.account_name, str): + self.account_name = str(self.account_name) + + if self._is_empty(self.account_alias): + self.MissingRequiredField("account_alias") + if not isinstance(self.account_alias, str): + self.account_alias = str(self.account_alias) + + if self._is_empty(self.bucket_study_id): + self.MissingRequiredField("bucket_study_id") + if not isinstance(self.bucket_study_id, str): + self.bucket_study_id = str(self.bucket_study_id) + + if self._is_empty(self.bucket): + self.MissingRequiredField("bucket") + if not isinstance(self.bucket, str): + self.bucket = str(self.bucket) + + if self._is_empty(self.s3_created_at): + self.MissingRequiredField("s3_created_at") + if not isinstance(self.s3_created_at, XSDDateTime): + self.s3_created_at = XSDDateTime(self.s3_created_at) + + if self._is_empty(self.s3_modified_at): + self.MissingRequiredField("s3_modified_at") + if not isinstance(self.s3_modified_at, XSDDateTime): + self.s3_modified_at = XSDDateTime(self.s3_modified_at) + + if self._is_empty(self.intelligent_tiering_access_tier): + self.MissingRequiredField("intelligent_tiering_access_tier") + if not isinstance(self.intelligent_tiering_access_tier, str): + self.intelligent_tiering_access_tier = str(self.intelligent_tiering_access_tier) + + if self._is_empty(self.is_delete_marker): + self.MissingRequiredField("is_delete_marker") + if not isinstance(self.is_delete_marker, Bool): + self.is_delete_marker = Bool(self.is_delete_marker) + + if self._is_empty(self.is_latest): + self.MissingRequiredField("is_latest") + if not isinstance(self.is_latest, Bool): + self.is_latest = Bool(self.is_latest) + + if self._is_empty(self.storage_class): + self.MissingRequiredField("storage_class") + if not isinstance(self.storage_class, str): + self.storage_class = str(self.storage_class) + + if self._is_empty(self.file_type): + self.MissingRequiredField("file_type") + if not isinstance(self.file_type, str): + self.file_type = str(self.file_type) + + if self._is_empty(self.encryption_status): + self.MissingRequiredField("encryption_status") + if not isinstance(self.encryption_status, str): + self.encryption_status = str(self.encryption_status) + + if self._is_empty(self.is_multipart_uploaded): + self.MissingRequiredField("is_multipart_uploaded") + if not isinstance(self.is_multipart_uploaded, str): + self.is_multipart_uploaded = str(self.is_multipart_uploaded) + + if self._is_empty(self.object_lock_level_hold_status): + self.MissingRequiredField("object_lock_level_hold_status") + if not isinstance(self.object_lock_level_hold_status, str): + self.object_lock_level_hold_status = str(self.object_lock_level_hold_status) + + if self._is_empty(self.object_lock_mode): + self.MissingRequiredField("object_lock_mode") + if not isinstance(self.object_lock_mode, str): + self.object_lock_mode = str(self.object_lock_mode) + + if self._is_empty(self.object_lock_retain_until_date): + self.MissingRequiredField("object_lock_retain_until_date") + if not isinstance(self.object_lock_retain_until_date, XSDDateTime): + self.object_lock_retain_until_date = XSDDateTime(self.object_lock_retain_until_date) + + if self._is_empty(self.replication_status): + self.MissingRequiredField("replication_status") + if not isinstance(self.replication_status, str): + self.replication_status = str(self.replication_status) + + if self._is_empty(self.version_id): + self.MissingRequiredField("version_id") + if not isinstance(self.version_id, str): + self.version_id = str(self.version_id) + + if self._is_empty(self.hash): + self.MissingRequiredField("hash") + if not isinstance(self.hash, FileHash): + self.hash = FileHash(**as_dict(self.hash)) + + if self._is_empty(self.access_type): + self.MissingRequiredField("access_type") + if not isinstance(self.access_type, EnumAccessType): + self.access_type = EnumAccessType(self.access_type) + + if self._is_empty(self.acl): + self.MissingRequiredField("acl") + if not isinstance(self.acl, str): + self.acl = str(self.acl) + + if self._is_empty(self.is_released): + self.MissingRequiredField("is_released") + if not isinstance(self.is_released, Bool): + self.is_released = Bool(self.is_released) + + if self._is_empty(self.is_registered): + self.MissingRequiredField("is_registered") + if not isinstance(self.is_registered, Bool): + self.is_registered = Bool(self.is_registered) + + if self._is_empty(self.experimental_strategy): + self.MissingRequiredField("experimental_strategy") + if not isinstance(self.experimental_strategy, EnumExperimentalStrategy): + self.experimental_strategy = EnumExperimentalStrategy(self.experimental_strategy) + + if self.subject_id is not None and not isinstance(self.subject_id, SubjectSubjectId): + self.subject_id = SubjectSubjectId(self.subject_id) + + if self.sample_id is not None and not isinstance(self.sample_id, SampleSampleId): + self.sample_id = SampleSampleId(self.sample_id) + + if self.data_transfer_id is not None and not isinstance(self.data_transfer_id, str): + self.data_transfer_id = str(self.data_transfer_id) + + if self.manifest_hash_value is not None and not isinstance(self.manifest_hash_value, str): + self.manifest_hash_value = str(self.manifest_hash_value) + + if self.file_hash_validation_status is not None and not isinstance(self.file_hash_validation_status, str): + self.file_hash_validation_status = str(self.file_hash_validation_status) + + if self.staging_url is not None and not isinstance(self.staging_url, URIorCURIE): + self.staging_url = URIorCURIE(self.staging_url) + + if self.release_url is not None and not isinstance(self.release_url, URIorCURIE): + self.release_url = URIorCURIE(self.release_url) + + if self.access_url is not None and not isinstance(self.access_url, str): + self.access_url = str(self.access_url) + + if self.drs_uri is not None and not isinstance(self.drs_uri, URIorCURIE): + self.drs_uri = URIorCURIE(self.drs_uri) + + if self.repository is not None and not isinstance(self.repository, EnumRepository): + self.repository = EnumRepository(self.repository) + + super().__post_init__(**kwargs) + + +@dataclass(repr=False) +class FileAssay(YAMLRoot): + """ + A file produced by or associated with an assay or data acquisition process including omics, imaging, actigraphy, + and other experimental or observational data. + """ + _inherited_slots: ClassVar[list[str]] = [] + + class_class_uri: ClassVar[URIRef] = INCLUDEDCC["FileAssay"] + class_class_curie: ClassVar[str] = "includedcc:FileAssay" + class_name: ClassVar[str] = "FileAssay" + class_model_uri: ClassVar[URIRef] = INCLUDEDCC.FileAssay + + file_id: Union[str, FileAssayFileId] = None + subject_id: Union[Union[str, SubjectSubjectId], list[Union[str, SubjectSubjectId]]] = None + sample_id: Union[Union[str, SampleSampleId], list[Union[str, SampleSampleId]]] = None + data_category: Union[str, "EnumDataCategory"] = None + experimental_strategy: Union[str, "EnumExperimentalStrategy"] = None + data_type: Union[str, "EnumEDAMDataTypes"] = None + format: Union[str, "EnumEDAMFormats"] = None + size: int = None + access_type: Union[str, "EnumAccessType"] = None + platform: Union[str, "EnumPlatform"] = None + assay_center: Optional[Union[str, "EnumAssayCenter"]] = None + workflow_name: Optional[str] = None + workflow_version: Optional[str] = None + + def __post_init__(self, *_: str, **kwargs: Any): + if self._is_empty(self.file_id): + self.MissingRequiredField("file_id") + if not isinstance(self.file_id, FileAssayFileId): + self.file_id = FileAssayFileId(self.file_id) + + if self._is_empty(self.subject_id): + self.MissingRequiredField("subject_id") + if not isinstance(self.subject_id, list): + self.subject_id = [self.subject_id] if self.subject_id is not None else [] + self.subject_id = [v if isinstance(v, SubjectSubjectId) else SubjectSubjectId(v) for v in self.subject_id] + + if self._is_empty(self.sample_id): + self.MissingRequiredField("sample_id") + if not isinstance(self.sample_id, list): + self.sample_id = [self.sample_id] if self.sample_id is not None else [] + self.sample_id = [v if isinstance(v, SampleSampleId) else SampleSampleId(v) for v in self.sample_id] + + if self._is_empty(self.data_category): + self.MissingRequiredField("data_category") + if not isinstance(self.data_category, EnumDataCategory): + self.data_category = EnumDataCategory(self.data_category) + + if self._is_empty(self.experimental_strategy): + self.MissingRequiredField("experimental_strategy") + if not isinstance(self.experimental_strategy, EnumExperimentalStrategy): + self.experimental_strategy = EnumExperimentalStrategy(self.experimental_strategy) + + if self._is_empty(self.size): + self.MissingRequiredField("size") + if not isinstance(self.size, int): + self.size = int(self.size) + + if self._is_empty(self.access_type): + self.MissingRequiredField("access_type") + if not isinstance(self.access_type, EnumAccessType): + self.access_type = EnumAccessType(self.access_type) + + if self._is_empty(self.platform): + self.MissingRequiredField("platform") + if not isinstance(self.platform, EnumPlatform): + self.platform = EnumPlatform(self.platform) + + if self.assay_center is not None and not isinstance(self.assay_center, EnumAssayCenter): + self.assay_center = EnumAssayCenter(self.assay_center) + + if self.workflow_name is not None and not isinstance(self.workflow_name, str): + self.workflow_name = str(self.workflow_name) + + if self.workflow_version is not None and not isinstance(self.workflow_version, str): + self.workflow_version = str(self.workflow_version) + + super().__post_init__(**kwargs) + + # Enumerations class EnumProgram(EnumDefinitionImpl): """ @@ -1584,6 +1956,149 @@ class EnumFileHashType(EnumDefinitionImpl): description="Types of file hashes supported.", ) +class EnumAccessType(EnumDefinitionImpl): + """ + Types of file access levels. + """ + open = PermissibleValue( + text="open", + title="Open Access") + controlled = PermissibleValue( + text="controlled", + title="Controlled Access") + registered = PermissibleValue( + text="registered", + title="Registered-tier Access") + + _defn = EnumDefinition( + name="EnumAccessType", + description="Types of file access levels.", + ) + +class EnumExperimentalStrategy(EnumDefinitionImpl): + """ + Types of sequencing methods. + """ + wgs = PermissibleValue( + text="wgs", + title="Whole Genome Sequencing") + rnaseq = PermissibleValue( + text="rnaseq", + title="RNA-Seq") + wxs = PermissibleValue( + text="wxs", + title="Whole Exome Sequencing") + methlyation = PermissibleValue( + text="methlyation", + title="Methylation") + clr_wgs = PermissibleValue( + text="clr_wgs", + title="Continuous Long Reads WGS") + proteomics = PermissibleValue( + text="proteomics", + title="Proteomics") + targeted_seq = PermissibleValue( + text="targeted_seq", + title="Targeted Sequencing") + ccs_wgs = PermissibleValue( + text="ccs_wgs", + title="Circular Consensus Sequencing WGS") + panel = PermissibleValue( + text="panel", + title="Panel") + ccs_rnaseq = PermissibleValue( + text="ccs_rnaseq", + title="Circular Consensus Sequencing RNA-Se") + ont_wgs = PermissibleValue( + text="ont_wgs", + title="Oxford Nanopore Technologies WGS") + clr_rnaseq = PermissibleValue( + text="clr_rnaseq", + title="Continuous Long Reads RNA-Seq") + + _defn = EnumDefinition( + name="EnumExperimentalStrategy", + description="Types of sequencing methods.", + ) + +class EnumAssayCenter(EnumDefinitionImpl): + """ + Organizations or centers producing raw or harmonized sequencing files. + """ + broad = PermissibleValue( + text="broad", + title="The Broad Institute") + hudsonalpha = PermissibleValue( + text="hudsonalpha", + title="HudsonAlpha Institute for Biotechnology") + stjude = PermissibleValue( + text="stjude", + title="St. Jude") + baylor = PermissibleValue( + text="baylor", + title="Baylor College of Medicine") + chop = PermissibleValue( + text="chop", + title="The Children's Hospital of Philadelphia") + other = PermissibleValue( + text="other", + title="Other") + unknown = PermissibleValue( + text="unknown", + title="Unknown") + + _defn = EnumDefinition( + name="EnumAssayCenter", + description="Organizations or centers producing raw or harmonized sequencing files.", + ) + +class EnumRepository(EnumDefinitionImpl): + """ + specific drs service used for registration + """ + cavatica = PermissibleValue( + text="cavatica", + title="Cavatica DRS") + dcf = PermissibleValue( + text="dcf", + title="NCI DCF") + other = PermissibleValue( + text="other", + title="Other") + + _defn = EnumDefinition( + name="EnumRepository", + description="specific drs service used for registration", + ) + +class EnumPlatform(EnumDefinitionImpl): + """ + names of instrument or platforms used for assay data generation + """ + illumina = PermissibleValue( + text="illumina", + title="Illumina") + pacbio = PermissibleValue( + text="pacbio", + title="PacBio") + ont = PermissibleValue( + text="ont", + title="ONT") + illumina_epic = PermissibleValue( + text="illumina_epic", + title="Illumina Infinium HumanMethylationEPICv2") + other = PermissibleValue( + text="other", + title="Other") + unknown = PermissibleValue( + text="unknown", + title="Unknown") + + _defn = EnumDefinition( + name="EnumPlatform", + description="names of instrument or platforms used for assay data generation", + ) + # Slots class slots: pass @@ -1820,16 +2335,16 @@ class slots: model_uri=INCLUDEDCC.file_id, domain=None, range=Optional[Union[str, FileFileId]]) slots.filename = Slot(uri=INCLUDEDCC.filename, name="filename", curie=INCLUDEDCC.curie('filename'), - model_uri=INCLUDEDCC.filename, domain=None, range=Optional[str]) + model_uri=INCLUDEDCC.filename, domain=None, range=str) slots.format = Slot(uri=INCLUDEDCC.format, name="format", curie=INCLUDEDCC.curie('format'), - model_uri=INCLUDEDCC.format, domain=None, range=Optional[Union[str, "EnumEDAMFormats"]]) + model_uri=INCLUDEDCC.format, domain=None, range=Union[str, "EnumEDAMFormats"]) slots.data_type = Slot(uri=INCLUDEDCC.data_type, name="data_type", curie=INCLUDEDCC.curie('data_type'), - model_uri=INCLUDEDCC.data_type, domain=None, range=Optional[Union[str, "EnumEDAMDataTypes"]]) + model_uri=INCLUDEDCC.data_type, domain=None, range=Union[str, "EnumEDAMDataTypes"]) slots.size = Slot(uri=INCLUDEDCC.size, name="size", curie=INCLUDEDCC.curie('size'), - model_uri=INCLUDEDCC.size, domain=None, range=Optional[int]) + model_uri=INCLUDEDCC.size, domain=None, range=int) slots.staging_url = Slot(uri=INCLUDEDCC.staging_url, name="staging_url", curie=INCLUDEDCC.curie('staging_url'), model_uri=INCLUDEDCC.staging_url, domain=None, range=Optional[Union[str, URIorCURIE]]) @@ -1841,13 +2356,13 @@ class slots: model_uri=INCLUDEDCC.drs_uri, domain=None, range=Optional[Union[str, URIorCURIE]]) slots.hash = Slot(uri=INCLUDEDCC.hash, name="hash", curie=INCLUDEDCC.curie('hash'), - model_uri=INCLUDEDCC.hash, domain=None, range=Optional[Union[dict, FileHash]]) + model_uri=INCLUDEDCC.hash, domain=None, range=Union[dict, FileHash]) slots.hash_type = Slot(uri=INCLUDEDCC.hash_type, name="hash_type", curie=INCLUDEDCC.curie('hash_type'), - model_uri=INCLUDEDCC.hash_type, domain=None, range=Optional[Union[str, "EnumFileHashType"]]) + model_uri=INCLUDEDCC.hash_type, domain=None, range=Union[str, "EnumFileHashType"]) slots.hash_value = Slot(uri=INCLUDEDCC.hash_value, name="hash_value", curie=INCLUDEDCC.curie('hash_value'), - model_uri=INCLUDEDCC.hash_value, domain=None, range=Optional[str]) + model_uri=INCLUDEDCC.hash_value, domain=None, range=str) slots.dataset_id = Slot(uri=INCLUDEDCC.dataset_id, name="dataset_id", curie=INCLUDEDCC.curie('dataset_id'), model_uri=INCLUDEDCC.dataset_id, domain=None, range=Optional[Union[str, DatasetDatasetId]]) @@ -1858,6 +2373,117 @@ class slots: slots.data_collection_end = Slot(uri=INCLUDEDCC.data_collection_end, name="data_collection_end", curie=INCLUDEDCC.curie('data_collection_end'), model_uri=INCLUDEDCC.data_collection_end, domain=None, range=Optional[str]) +slots.s3_file_path = Slot(uri=INCLUDEDCC.s3_file_path, name="s3_file_path", curie=INCLUDEDCC.curie('s3_file_path'), + model_uri=INCLUDEDCC.s3_file_path, domain=None, range=str) + +slots.is_released = Slot(uri=INCLUDEDCC.is_released, name="is_released", curie=INCLUDEDCC.curie('is_released'), + model_uri=INCLUDEDCC.is_released, domain=None, range=Union[bool, Bool]) + +slots.is_registered = Slot(uri=INCLUDEDCC.is_registered, name="is_registered", curie=INCLUDEDCC.curie('is_registered'), + model_uri=INCLUDEDCC.is_registered, domain=None, range=Union[bool, Bool]) + +slots.repository = Slot(uri=INCLUDEDCC.repository, name="repository", curie=INCLUDEDCC.curie('repository'), + model_uri=INCLUDEDCC.repository, domain=None, range=Optional[Union[str, "EnumRepository"]]) + +slots.file_category = Slot(uri=INCLUDEDCC.file_category, name="file_category", curie=INCLUDEDCC.curie('file_category'), + model_uri=INCLUDEDCC.file_category, domain=None, range=str) + +slots.s3_key = Slot(uri=INCLUDEDCC.s3_key, name="s3_key", curie=INCLUDEDCC.curie('s3_key'), + model_uri=INCLUDEDCC.s3_key, domain=None, range=str) + +slots.file_extension = Slot(uri=INCLUDEDCC.file_extension, name="file_extension", curie=INCLUDEDCC.curie('file_extension'), + model_uri=INCLUDEDCC.file_extension, domain=None, range=str) + +slots.data_transfer_id = Slot(uri=INCLUDEDCC.data_transfer_id, name="data_transfer_id", curie=INCLUDEDCC.curie('data_transfer_id'), + model_uri=INCLUDEDCC.data_transfer_id, domain=None, range=Optional[str]) + +slots.aws_account_id = Slot(uri=INCLUDEDCC.aws_account_id, name="aws_account_id", curie=INCLUDEDCC.curie('aws_account_id'), + model_uri=INCLUDEDCC.aws_account_id, domain=None, range=str) + +slots.account_name = Slot(uri=INCLUDEDCC.account_name, name="account_name", curie=INCLUDEDCC.curie('account_name'), + model_uri=INCLUDEDCC.account_name, domain=None, range=str) + +slots.account_alias = Slot(uri=INCLUDEDCC.account_alias, name="account_alias", curie=INCLUDEDCC.curie('account_alias'), + model_uri=INCLUDEDCC.account_alias, domain=None, range=str) + +slots.bucket_study_id = Slot(uri=INCLUDEDCC.bucket_study_id, name="bucket_study_id", curie=INCLUDEDCC.curie('bucket_study_id'), + model_uri=INCLUDEDCC.bucket_study_id, domain=None, range=str) + +slots.bucket = Slot(uri=INCLUDEDCC.bucket, name="bucket", curie=INCLUDEDCC.curie('bucket'), + model_uri=INCLUDEDCC.bucket, domain=None, range=str) + +slots.s3_created_at = Slot(uri=INCLUDEDCC.s3_created_at, name="s3_created_at", curie=INCLUDEDCC.curie('s3_created_at'), + model_uri=INCLUDEDCC.s3_created_at, domain=None, range=Union[str, XSDDateTime]) + +slots.s3_modified_at = Slot(uri=INCLUDEDCC.s3_modified_at, name="s3_modified_at", curie=INCLUDEDCC.curie('s3_modified_at'), + model_uri=INCLUDEDCC.s3_modified_at, domain=None, range=Union[str, XSDDateTime]) + +slots.intelligent_tiering_access_tier = Slot(uri=INCLUDEDCC.intelligent_tiering_access_tier, name="intelligent_tiering_access_tier", curie=INCLUDEDCC.curie('intelligent_tiering_access_tier'), + model_uri=INCLUDEDCC.intelligent_tiering_access_tier, domain=None, range=str) + +slots.is_delete_marker = Slot(uri=INCLUDEDCC.is_delete_marker, name="is_delete_marker", curie=INCLUDEDCC.curie('is_delete_marker'), + model_uri=INCLUDEDCC.is_delete_marker, domain=None, range=Union[bool, Bool]) + +slots.is_latest = Slot(uri=INCLUDEDCC.is_latest, name="is_latest", curie=INCLUDEDCC.curie('is_latest'), + model_uri=INCLUDEDCC.is_latest, domain=None, range=Union[bool, Bool]) + +slots.storage_class = Slot(uri=INCLUDEDCC.storage_class, name="storage_class", curie=INCLUDEDCC.curie('storage_class'), + model_uri=INCLUDEDCC.storage_class, domain=None, range=str) + +slots.manifest_hash_value = Slot(uri=INCLUDEDCC.manifest_hash_value, name="manifest_hash_value", curie=INCLUDEDCC.curie('manifest_hash_value'), + model_uri=INCLUDEDCC.manifest_hash_value, domain=None, range=Optional[str]) + +slots.file_hash_validation_status = Slot(uri=INCLUDEDCC.file_hash_validation_status, name="file_hash_validation_status", curie=INCLUDEDCC.curie('file_hash_validation_status'), + model_uri=INCLUDEDCC.file_hash_validation_status, domain=None, range=Optional[str]) + +slots.file_type = Slot(uri=INCLUDEDCC.file_type, name="file_type", curie=INCLUDEDCC.curie('file_type'), + model_uri=INCLUDEDCC.file_type, domain=None, range=str) + +slots.encryption_status = Slot(uri=INCLUDEDCC.encryption_status, name="encryption_status", curie=INCLUDEDCC.curie('encryption_status'), + model_uri=INCLUDEDCC.encryption_status, domain=None, range=str) + +slots.is_multipart_uploaded = Slot(uri=INCLUDEDCC.is_multipart_uploaded, name="is_multipart_uploaded", curie=INCLUDEDCC.curie('is_multipart_uploaded'), + model_uri=INCLUDEDCC.is_multipart_uploaded, domain=None, range=str) + +slots.object_lock_level_hold_status = Slot(uri=INCLUDEDCC.object_lock_level_hold_status, name="object_lock_level_hold_status", curie=INCLUDEDCC.curie('object_lock_level_hold_status'), + model_uri=INCLUDEDCC.object_lock_level_hold_status, domain=None, range=str) + +slots.object_lock_mode = Slot(uri=INCLUDEDCC.object_lock_mode, name="object_lock_mode", curie=INCLUDEDCC.curie('object_lock_mode'), + model_uri=INCLUDEDCC.object_lock_mode, domain=None, range=str) + +slots.replication_status = Slot(uri=INCLUDEDCC.replication_status, name="replication_status", curie=INCLUDEDCC.curie('replication_status'), + model_uri=INCLUDEDCC.replication_status, domain=None, range=str) + +slots.version_id = Slot(uri=INCLUDEDCC.version_id, name="version_id", curie=INCLUDEDCC.curie('version_id'), + model_uri=INCLUDEDCC.version_id, domain=None, range=str) + +slots.access_type = Slot(uri=INCLUDEDCC.access_type, name="access_type", curie=INCLUDEDCC.curie('access_type'), + model_uri=INCLUDEDCC.access_type, domain=None, range=Union[str, "EnumAccessType"]) + +slots.access_url = Slot(uri=INCLUDEDCC.access_url, name="access_url", curie=INCLUDEDCC.curie('access_url'), + model_uri=INCLUDEDCC.access_url, domain=None, range=Optional[str]) + +slots.acl = Slot(uri=INCLUDEDCC.acl, name="acl", curie=INCLUDEDCC.curie('acl'), + model_uri=INCLUDEDCC.acl, domain=None, range=str) + +slots.experimental_strategy = Slot(uri=INCLUDEDCC.experimental_strategy, name="experimental_strategy", curie=INCLUDEDCC.curie('experimental_strategy'), + model_uri=INCLUDEDCC.experimental_strategy, domain=None, range=Union[str, "EnumExperimentalStrategy"]) + +slots.assay_center = Slot(uri=INCLUDEDCC.assay_center, name="assay_center", curie=INCLUDEDCC.curie('assay_center'), + model_uri=INCLUDEDCC.assay_center, domain=None, range=Optional[Union[str, "EnumAssayCenter"]]) + +slots.platform = Slot(uri=INCLUDEDCC.platform, name="platform", curie=INCLUDEDCC.curie('platform'), + model_uri=INCLUDEDCC.platform, domain=None, range=Union[str, "EnumPlatform"]) + +slots.workflow_name = Slot(uri=INCLUDEDCC.workflow_name, name="workflow_name", curie=INCLUDEDCC.curie('workflow_name'), + model_uri=INCLUDEDCC.workflow_name, domain=None, range=Optional[str]) + +slots.workflow_version = Slot(uri=INCLUDEDCC.workflow_version, name="workflow_version", curie=INCLUDEDCC.curie('workflow_version'), + model_uri=INCLUDEDCC.workflow_version, domain=None, range=Optional[str]) + +slots.object_lock_retain_until_date = Slot(uri=INCLUDEDCC.object_lock_retain_until_date, name="object_lock_retain_until_date", curie=INCLUDEDCC.curie('object_lock_retain_until_date'), + model_uri=INCLUDEDCC.object_lock_retain_until_date, domain=None, range=Union[str, XSDDateTime]) + slots.Study_study_id = Slot(uri=INCLUDEDCC.study_id, name="Study_study_id", curie=INCLUDEDCC.curie('study_id'), model_uri=INCLUDEDCC.Study_study_id, domain=Study, range=Union[str, StudyStudyId]) @@ -1906,17 +2532,41 @@ class slots: slots.ActivityDefinition_activity_definition_id = Slot(uri=INCLUDEDCC.activity_definition_id, name="ActivityDefinition_activity_definition_id", curie=INCLUDEDCC.curie('activity_definition_id'), model_uri=INCLUDEDCC.ActivityDefinition_activity_definition_id, domain=ActivityDefinition, range=Union[str, ActivityDefinitionActivityDefinitionId]) +slots.File_study_id = Slot(uri=INCLUDEDCC.study_id, name="File_study_id", curie=INCLUDEDCC.curie('study_id'), + model_uri=INCLUDEDCC.File_study_id, domain=File, range=Union[str, StudyStudyId]) + slots.File_file_id = Slot(uri=INCLUDEDCC.file_id, name="File_file_id", curie=INCLUDEDCC.curie('file_id'), model_uri=INCLUDEDCC.File_file_id, domain=File, range=Union[str, FileFileId]) slots.File_subject_id = Slot(uri=INCLUDEDCC.subject_id, name="File_subject_id", curie=INCLUDEDCC.curie('subject_id'), - model_uri=INCLUDEDCC.File_subject_id, domain=File, range=Optional[Union[Union[str, SubjectSubjectId], list[Union[str, SubjectSubjectId]]]]) + model_uri=INCLUDEDCC.File_subject_id, domain=File, range=Union[Union[str, SubjectSubjectId], list[Union[str, SubjectSubjectId]]]) slots.File_sample_id = Slot(uri=INCLUDEDCC.sample_id, name="File_sample_id", curie=INCLUDEDCC.curie('sample_id'), - model_uri=INCLUDEDCC.File_sample_id, domain=File, range=Optional[Union[Union[str, SampleSampleId], list[Union[str, SampleSampleId]]]]) + model_uri=INCLUDEDCC.File_sample_id, domain=File, range=Union[Union[str, SampleSampleId], list[Union[str, SampleSampleId]]]) + +slots.File_data_category = Slot(uri=INCLUDEDCC.data_category, name="File_data_category", curie=INCLUDEDCC.curie('data_category'), + model_uri=INCLUDEDCC.File_data_category, domain=File, range=Union[str, "EnumDataCategory"]) slots.Dataset_dataset_id = Slot(uri=INCLUDEDCC.dataset_id, name="Dataset_dataset_id", curie=INCLUDEDCC.curie('dataset_id'), model_uri=INCLUDEDCC.Dataset_dataset_id, domain=Dataset, range=Union[str, DatasetDatasetId]) slots.Dataset_file_id = Slot(uri=INCLUDEDCC.file_id, name="Dataset_file_id", curie=INCLUDEDCC.curie('file_id'), model_uri=INCLUDEDCC.Dataset_file_id, domain=Dataset, range=Optional[Union[Union[str, FileFileId], list[Union[str, FileFileId]]]]) + +slots.FileAdmin_study_id = Slot(uri=INCLUDEDCC.study_id, name="FileAdmin_study_id", curie=INCLUDEDCC.curie('study_id'), + model_uri=INCLUDEDCC.FileAdmin_study_id, domain=FileAdmin, range=Union[str, StudyStudyId]) + +slots.FileAdmin_file_id = Slot(uri=INCLUDEDCC.file_id, name="FileAdmin_file_id", curie=INCLUDEDCC.curie('file_id'), + model_uri=INCLUDEDCC.FileAdmin_file_id, domain=FileAdmin, range=Union[str, FileAdminFileId]) + +slots.FileAssay_file_id = Slot(uri=INCLUDEDCC.file_id, name="FileAssay_file_id", curie=INCLUDEDCC.curie('file_id'), + model_uri=INCLUDEDCC.FileAssay_file_id, domain=FileAssay, range=Union[str, FileAssayFileId]) + +slots.FileAssay_subject_id = Slot(uri=INCLUDEDCC.subject_id, name="FileAssay_subject_id", curie=INCLUDEDCC.curie('subject_id'), + model_uri=INCLUDEDCC.FileAssay_subject_id, domain=FileAssay, range=Union[Union[str, SubjectSubjectId], list[Union[str, SubjectSubjectId]]]) + +slots.FileAssay_sample_id = Slot(uri=INCLUDEDCC.sample_id, name="FileAssay_sample_id", curie=INCLUDEDCC.curie('sample_id'), + model_uri=INCLUDEDCC.FileAssay_sample_id, domain=FileAssay, range=Union[Union[str, SampleSampleId], list[Union[str, SampleSampleId]]]) + +slots.FileAssay_data_category = Slot(uri=INCLUDEDCC.data_category, name="FileAssay_data_category", curie=INCLUDEDCC.curie('data_category'), + model_uri=INCLUDEDCC.FileAssay_data_category, domain=FileAssay, range=Union[str, "EnumDataCategory"]) diff --git a/src/include_access_model/datamodel/include_access_model_pydantic.py b/src/include_access_model/datamodel/include_access_model_pydantic.py index 5fef463..4c60f27 100644 --- a/src/include_access_model/datamodel/include_access_model_pydantic.py +++ b/src/include_access_model/datamodel/include_access_model_pydantic.py @@ -424,6 +424,67 @@ class EnumFileHashType(str, Enum): SHA_1 = "sha1" +class EnumAccessType(str, Enum): + """ + Types of file access levels. + """ + Open_Access = "open" + Controlled_Access = "controlled" + Registered_tier_Access = "registered" + + +class EnumExperimentalStrategy(str, Enum): + """ + Types of sequencing methods. + """ + Whole_Genome_Sequencing = "wgs" + RNA_Seq = "rnaseq" + Whole_Exome_Sequencing = "wxs" + Methylation = "methlyation" + Continuous_Long_Reads_WGS = "clr_wgs" + Proteomics = "proteomics" + Targeted_Sequencing = "targeted_seq" + Circular_Consensus_Sequencing_WGS = "ccs_wgs" + Panel = "panel" + Circular_Consensus_Sequencing_RNA_Se = "ccs_rnaseq" + Oxford_Nanopore_Technologies_WGS = "ont_wgs" + Continuous_Long_Reads_RNA_Seq = "clr_rnaseq" + + +class EnumAssayCenter(str, Enum): + """ + Organizations or centers producing raw or harmonized sequencing files. + """ + The_Broad_Institute = "broad" + HudsonAlpha_Institute_for_Biotechnology = "hudsonalpha" + StFULL_STOP_Jude = "stjude" + Baylor_College_of_Medicine = "baylor" + The_ChildrenAPOSTROPHEs_Hospital_of_Philadelphia = "chop" + Other = "other" + Unknown = "unknown" + + +class EnumRepository(str, Enum): + """ + specific drs service used for registration + """ + Cavatica_DRS = "cavatica" + NCI_DCF = "dcf" + Other = "other" + + +class EnumPlatform(str, Enum): + """ + names of instrument or platforms used for assay data generation + """ + Illumina = "illumina" + PacBio = "pacbio" + ONT = "ont" + Illumina_Infinium_HumanMethylationEPICv2 = "illumina_epic" + Other = "other" + Unknown = "unknown" + + class Record(ConfiguredBaseModel): """ @@ -447,7 +508,7 @@ class Study(Record): 'required': True}}, 'title': 'Research Study'}) - study_id: str = Field(default=..., title="Study ID", description="""INCLUDE Global ID for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'StudyMetadata']} }) + study_id: str = Field(default=..., title="Study ID", description="""INCLUDE Global ID for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'StudyMetadata', 'File', 'FileAdmin']} }) parent_study: Optional[str] = Field(default=None, title="Parent Study", description="""The parent study for this study, if it is a nested study.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) study_title: str = Field(default=..., description="""Full Study Title""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) study_code: str = Field(default=..., title="Study Code", description="""Unique identifier for the study (generally a short acronym)""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study']} }) @@ -478,12 +539,12 @@ class StudyMetadata(Record): 'required': True}}, 'title': 'Study Metadata'}) - study_id: str = Field(default=..., title="Study ID", description="""INCLUDE Global ID for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'StudyMetadata']} }) + study_id: str = Field(default=..., title="Study ID", description="""INCLUDE Global ID for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'StudyMetadata', 'File', 'FileAdmin']} }) participant_lifespan_stage: list[EnumParticipantLifespanStage] = Field(default=..., title="Participant Lifespan Stage", description="""Focus age group(s) of the study population""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) selection_criteria: Optional[str] = Field(default=None, title="Selection Criteria", description="""Brief description of inclusion and/or exclusion criteria for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) study_design: list[EnumStudyDesign] = Field(default=..., title="Study Design", description="""Overall design of study, including whether it is longitudinal and whether family members/unrelated controls are also enrolled""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) clinical_data_source_type: list[EnumClinicalDataSourceType] = Field(default=..., title="Clinical Data Source Type", description="""Source(s) of data collected from study participants""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) - data_category: list[EnumDataCategory] = Field(default=..., title="Data Category", description="""General category of data in this Record (e.g. Clinical, Genomics, etc)""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata', 'File']} }) + data_category: list[EnumDataCategory] = Field(default=..., title="Data Category", description="""General category of data in this Record (e.g. Clinical, Genomics, etc)""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata', 'File', 'FileAssay']} }) vbr: Optional[VirtualBiorepository] = Field(default=None, title="Virtual Biorepository", description="""Information about the study's Virtual Biorepository, if participating""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) research_domain: list[EnumResearchDomain] = Field(default=..., description="""Main research domain(s) of the study, other than Down syndrome""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) expected_number_of_participants: int = Field(default=..., title="Expected Number of Participants", description="""Total expected number of participants to be recruited.""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata']} }) @@ -571,7 +632,9 @@ class Subject(Record): 'Demographics', 'SubjectAssertion', 'Encounter', - 'File']} }) + 'File', + 'FileAdmin', + 'FileAssay']} }) subject_type: EnumSubjectType = Field(default=..., title="Subject Type", description="""Type of entity this record represents""", json_schema_extra = { "linkml_meta": {'domain_of': ['Subject']} }) organism_type: Optional[str] = Field(default=None, title="Organism Type", description="""Organism Type""", json_schema_extra = { "linkml_meta": {'domain_of': ['Subject']} }) external_id: Optional[list[str]] = Field(default=[], title="External Identifiers", description="""Other identifiers for this entity, eg, from the submitting study or in systems like dbGaP""", json_schema_extra = { "linkml_meta": {'domain_of': ['Record']} }) @@ -591,7 +654,9 @@ class Demographics(Record): 'Demographics', 'SubjectAssertion', 'Encounter', - 'File']} }) + 'File', + 'FileAdmin', + 'FileAssay']} }) sex: EnumSex = Field(default=..., title="Sex", description="""Sex of Participant""", json_schema_extra = { "linkml_meta": {'domain_of': ['Demographics']} }) race: list[EnumRace] = Field(default=..., title="Race", description="""Race of Participant""", json_schema_extra = { "linkml_meta": {'domain_of': ['Demographics']} }) ethnicity: EnumEthnicity = Field(default=..., title="Ethnicity", description="""Ethnicity of Participant""", json_schema_extra = { "linkml_meta": {'domain_of': ['Demographics']} }) @@ -618,7 +683,9 @@ class SubjectAssertion(Record): 'Demographics', 'SubjectAssertion', 'Encounter', - 'File']} }) + 'File', + 'FileAdmin', + 'FileAssay']} }) encounter_id: Optional[str] = Field(default=None, title="Encounter ID", description="""Unique identifier for this Encounter.""", json_schema_extra = { "linkml_meta": {'domain_of': ['SubjectAssertion', 'BiospecimenCollection', 'Encounter']} }) assertion_provenance: Optional[EnumAssertionProvenance] = Field(default=None, title="Assertion Provenance", description="""The original source of this assertion""", json_schema_extra = { "linkml_meta": {'domain_of': ['SubjectAssertion']} }) age_at_assertion: Optional[int] = Field(default=None, title="Age at assertion", description="""The age in days of the Subject when the assertion was made.""", json_schema_extra = { "linkml_meta": {'domain_of': ['SubjectAssertion'], 'unit': {'ucum_code': 'd'}} }) @@ -665,7 +732,7 @@ class Sample(Record): 'required': True}}, 'title': 'Sample'}) - sample_id: str = Field(default=..., title="Sample ID", description="""The unique identifier for this Sample.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot', 'File']} }) + sample_id: str = Field(default=..., title="Sample ID", description="""The unique identifier for this Sample.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot', 'File', 'FileAdmin', 'FileAssay']} }) biospecimen_collection_id: Optional[str] = Field(default=None, title="Biospecimen Collection ID", description="""Biospecimen Collection during which this sample was generated.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'BiospecimenCollection']} }) parent_sample_id: Optional[str] = Field(default=None, title="Parent Sample ID", description="""Sample from which this sample is derived""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample']} }) sample_type: str = Field(default=..., title="Sample Type", description="""Type of material of which this Sample is comprised""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample']} }) @@ -710,7 +777,7 @@ class Aliquot(Record): 'title': 'Aliquot'}) aliquot_id: str = Field(default=..., title="Aliquot ID", description="""Unique identifier for an Aliquot.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Aliquot']} }) - sample_id: Optional[str] = Field(default=None, title="Sample ID", description="""The unique identifier for this Sample.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot', 'File']} }) + sample_id: Optional[str] = Field(default=None, title="Sample ID", description="""The unique identifier for this Sample.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot', 'File', 'FileAdmin', 'FileAssay']} }) availablity_status: Optional[EnumAvailabilityStatus] = Field(default=None, title="Sample Availability", description="""Can this Sample be requested for further analysis?""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot']} }) quantity_number: Optional[float] = Field(default=None, title="Quantity", description="""The total quantity of the specimen""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot']} }) quantity_unit: Optional[str] = Field(default=None, title="Quantity Units", description="""The structured term defining the units of the quantity.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot']} }) @@ -735,7 +802,9 @@ class Encounter(Record): 'Demographics', 'SubjectAssertion', 'Encounter', - 'File']} }) + 'File', + 'FileAdmin', + 'FileAssay']} }) encounter_definition_id: Optional[str] = Field(default=None, title="Encounter Definition ID", description="""Unique identifier for this Encounter Definition.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Encounter', 'EncounterDefinition']} }) age_at_event: Optional[int] = Field(default=None, title="Age at event", description="""The age in days of the Subject at the time point which the assertion describes, eg, age of onset or when a measurement was performed.""", json_schema_extra = { "linkml_meta": {'domain_of': ['SubjectAssertion', 'Encounter'], 'unit': {'ucum_code': 'd'}} }) external_id: Optional[list[str]] = Field(default=[], title="External Identifiers", description="""Other identifiers for this entity, eg, from the submitting study or in systems like dbGaP""", json_schema_extra = { "linkml_meta": {'domain_of': ['Record']} }) @@ -788,33 +857,43 @@ class ActivityDefinition(Record): class File(Record): """ - File + Required information for portal use. """ linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://includedcc.org/include-access-model', - 'slot_usage': {'file_id': {'identifier': True, + 'slot_usage': {'data_category': {'name': 'data_category', 'required': True}, + 'file_id': {'identifier': True, 'name': 'file_id', 'range': 'string', 'required': True}, - 'sample_id': {'multivalued': True, 'name': 'sample_id'}, - 'subject_id': {'multivalued': True, 'name': 'subject_id'}}, + 'sample_id': {'multivalued': True, + 'name': 'sample_id', + 'required': True}, + 'study_id': {'name': 'study_id', 'required': True}, + 'subject_id': {'multivalued': True, + 'name': 'subject_id', + 'required': True}}, 'title': 'File'}) - file_id: str = Field(default=..., title="File ID", description="""Unique identifier for this File.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'Dataset']} }) - subject_id: Optional[list[str]] = Field(default=[], title="Study ID", description="""INCLUDE Global ID for the Subject""", json_schema_extra = { "linkml_meta": {'domain_of': ['Subject', + study_id: str = Field(default=..., title="Study ID", description="""INCLUDE Global ID for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'StudyMetadata', 'File', 'FileAdmin']} }) + file_id: str = Field(default=..., title="File ID", description="""Unique identifier for this File.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'Dataset', 'FileAdmin', 'FileAssay']} }) + subject_id: list[str] = Field(default=..., title="Study ID", description="""INCLUDE Global ID for the Subject""", json_schema_extra = { "linkml_meta": {'domain_of': ['Subject', 'Demographics', 'SubjectAssertion', 'Encounter', - 'File']} }) - sample_id: Optional[list[str]] = Field(default=[], title="Sample ID", description="""The unique identifier for this Sample.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot', 'File']} }) - filename: Optional[str] = Field(default=None, title="Filename", description="""The name of the file.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File']} }) - format: Optional[EnumEDAMFormats] = Field(default=None, title="File Format", description="""The format of the file.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File']} }) - data_category: Optional[EnumDataCategory] = Field(default=None, title="Data Category", description="""General category of data in this Record (e.g. Clinical, Genomics, etc)""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata', 'File']} }) - data_type: Optional[EnumEDAMDataTypes] = Field(default=None, title="Data Type", description="""The type of data within this file.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File']} }) - size: Optional[int] = Field(default=None, title="File Size", description="""Size of the file, in Bytes.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File'], 'unit': {'ucum_code': 'By'}} }) - staging_url: Optional[str] = Field(default=None, title="Staging Location", description="""URL for internal access to the data. May be temporary.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File']} }) - release_url: Optional[str] = Field(default=None, title="Release Location", description="""URL for controlled or open access to the data.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File']} }) - drs_uri: Optional[str] = Field(default=None, title="DRS URI", description="""DRS location to access the data.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File']} }) - hash: Optional[FileHash] = Field(default=None, title="File Hash", description="""File hash information""", json_schema_extra = { "linkml_meta": {'domain_of': ['File']} }) + 'File', + 'FileAdmin', + 'FileAssay']} }) + sample_id: list[str] = Field(default=..., title="Sample ID", description="""The unique identifier for this Sample.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot', 'File', 'FileAdmin', 'FileAssay']} }) + s3_file_path: str = Field(default=..., title="S3 File Path", description="""The full s3 url of a file's location in aws""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin']} }) + filename: str = Field(default=..., title="Filename", description="""The name of the file.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File']} }) + size: int = Field(default=..., title="File Size", description="""Size of the file, in Bytes.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin', 'FileAssay'], 'unit': {'ucum_code': 'By'}} }) + format: EnumEDAMFormats = Field(default=..., title="File Format", description="""The format of the file.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAssay']} }) + data_category: EnumDataCategory = Field(default=..., title="Data Category", description="""General category of data in this Record (e.g. Clinical, Genomics, etc)""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata', 'File', 'FileAssay']} }) + data_type: EnumEDAMDataTypes = Field(default=..., title="Data Type", description="""The type of data within this file.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAssay']} }) + staging_url: Optional[str] = Field(default=None, title="Staging Location", description="""URL for internal access to the data. May be temporary.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin']} }) + release_url: Optional[str] = Field(default=None, title="Release Location", description="""URL for controlled or open access to the data.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin']} }) + drs_uri: Optional[str] = Field(default=None, title="DRS URI", description="""DRS location to access the data.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin']} }) + hash: FileHash = Field(default=..., title="File Hash", description="""File hash information""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin']} }) external_id: Optional[list[str]] = Field(default=[], title="External Identifiers", description="""Other identifiers for this entity, eg, from the submitting study or in systems like dbGaP""", json_schema_extra = { "linkml_meta": {'domain_of': ['Record']} }) @@ -825,8 +904,8 @@ class FileHash(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://includedcc.org/include-access-model', 'title': 'File Hash'}) - hash_type: Optional[EnumFileHashType] = Field(default=None, title="File Hash Type", description="""The type of file hash, eg, md5""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileHash']} }) - hash_value: Optional[str] = Field(default=None, title="File Hash Value", description="""The value of the file hash""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileHash']} }) + hash_type: EnumFileHashType = Field(default=..., title="File Hash Type", description="""The type of file hash, eg, md5""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileHash']} }) + hash_value: str = Field(default=..., title="File Hash Value", description="""The value of the file hash""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileHash']} }) class Dataset(ConfiguredBaseModel): @@ -852,12 +931,110 @@ class Dataset(ConfiguredBaseModel): 'Dataset']} }) description: Optional[str] = Field(default=None, title="Description", description="""Description for this entity.""", json_schema_extra = { "linkml_meta": {'domain_of': ['EncounterDefinition', 'ActivityDefinition', 'Dataset']} }) do_id: Optional[str] = Field(default=None, title="DOI", description="""Digital Object Identifier (DOI) for this Record.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'DOI', 'Dataset']} }) - file_id: Optional[list[str]] = Field(default=[], title="File ID", description="""The list of files comprising this dataset.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'Dataset']} }) + file_id: Optional[list[str]] = Field(default=[], title="File ID", description="""The list of files comprising this dataset.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'Dataset', 'FileAdmin', 'FileAssay']} }) publication: Optional[list[Publication]] = Field(default=[], title="Publication", description="""Publications associated with this Record.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'Dataset']} }) data_collection_start: Optional[str] = Field(default=None, title="Data Collection Start", description="""The date that data collection started. May include only a year.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Dataset']} }) data_collection_end: Optional[str] = Field(default=None, title="Data Collection End", description="""The date that data collection started. May include only a year.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Dataset']} }) +class FileAdmin(ConfiguredBaseModel): + """ + File unvierse; contains all information about a file that may be needed for operational work + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://includedcc.org/include-access-model', + 'slot_usage': {'file_id': {'identifier': True, + 'name': 'file_id', + 'required': True}, + 'study_id': {'name': 'study_id', 'required': True}}}) + + study_id: str = Field(default=..., title="Study ID", description="""INCLUDE Global ID for the study""", json_schema_extra = { "linkml_meta": {'domain_of': ['Study', 'StudyMetadata', 'File', 'FileAdmin']} }) + file_id: str = Field(default=..., title="File ID", description="""Unique identifier for this File.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'Dataset', 'FileAdmin', 'FileAssay']} }) + subject_id: Optional[str] = Field(default=None, title="Study ID", description="""INCLUDE Global ID for the Subject""", json_schema_extra = { "linkml_meta": {'domain_of': ['Subject', + 'Demographics', + 'SubjectAssertion', + 'Encounter', + 'File', + 'FileAdmin', + 'FileAssay']} }) + sample_id: Optional[str] = Field(default=None, title="Sample ID", description="""The unique identifier for this Sample.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot', 'File', 'FileAdmin', 'FileAssay']} }) + s3_file_path: str = Field(default=..., title="S3 File Path", description="""The full s3 url of a file's location in aws""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin']} }) + file_category: str = Field(default=..., title="File Category", description="""A high level classification of the file used for operations.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + size: int = Field(default=..., title="File Size", description="""Size of the file, in Bytes.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin', 'FileAssay'], 'unit': {'ucum_code': 'By'}} }) + s3_key: str = Field(default=..., title="S3 Key", description="""The unique identifier for an object within a bucket""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + file_extension: str = Field(default=..., title="File Extension", description="""A 3-4 letter code at the end of a filename that identifies the file format.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + data_transfer_id: Optional[str] = Field(default=None, title="Data Transfer ID", description="""A jira ticket number associated with a file transfer request to production bucket""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + aws_account_id: str = Field(default=..., title="AWS Account ID", description="""A 12-digit number that uniquely identifies a specific AWS account""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + account_name: str = Field(default=..., title="AWS Account Name", description="""A user-defined label used to define an AWS accoun.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + account_alias: str = Field(default=..., title="Account Alias", description="""A unique user-defined string that replaces the AWS Account ID in the IAM user sign-in URL""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + bucket_study_id: str = Field(default=..., title="Bucket Study ID", description="""The global study ID used to create the bucket""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + bucket: str = Field(default=..., title="Bucket", description="""Cloud storage container in AWS used to manage and store s3 objects""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + s3_created_at: datetime = Field(default=..., title="S3 Created At", description="""Timestamp of when a file was uploaded to an s3 bucket.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + s3_modified_at: datetime = Field(default=..., title="S3 Modified At", description="""Timestamp of when a file was modified in an s3 bucket.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + intelligent_tiering_access_tier: str = Field(default=..., title="Intelligent Tiering Access Tier", description="""Storage access tier assigned by AWS intelliegnt tiering, indicating the current access frequency classification of the object""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + is_delete_marker: bool = Field(default=..., title="Is Delete Marker", description="""A flag that notes whether a file has been deleted from s3""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + is_latest: bool = Field(default=..., title="Is Latest", description="""Specifies whether an object version is the most recent version of that object""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + storage_class: str = Field(default=..., title="Storage Class", description="""Storage tier of the object in AWS reflecting cost and access characteristics.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + manifest_hash_value: Optional[str] = Field(default=None, title="Manifest Hash Value", description="""The provided hash value from external users to be validated against internal hash values""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + file_hash_validation_status: Optional[str] = Field(default=None, title="File Hash Validation Status", description="""Notes whether hashes have been generated and verified against manifest hash values.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + file_type: str = Field(default=..., title="File Type", description="""An internal type or classification of the files based on its operational usuage.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + encryption_status: str = Field(default=..., title="Encryption Status", description="""Indicates whether the object in AWS is encrypted and the type of encryption applied.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + is_multipart_uploaded: str = Field(default=..., title="Is Multipart Uploaded", description="""Indicates whether the object was uploaded using a multipart upload process.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + object_lock_level_hold_status: str = Field(default=..., title="Object Lock Level Hold Status", description="""Whether a legal hold is applied to prevent deletion of the object.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + object_lock_mode: str = Field(default=..., title="Object Lock Mode", description="""Retention mode applied to the object that restricts deletion or modification.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + object_lock_retain_until_date: datetime = Field(default=..., title="Object Lock Retain Until Date", description="""Specifies exact date and time when an object's Object Lock rentention period expires.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + replication_status: str = Field(default=..., title="Replication Status", description="""Status of the object's replication to another storage location.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + version_id: str = Field(default=..., title="Version ID", description="""Identifier for a specific version of the object""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + staging_url: Optional[str] = Field(default=None, title="Staging Location", description="""URL for internal access to the data. May be temporary.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin']} }) + release_url: Optional[str] = Field(default=None, title="Release Location", description="""URL for controlled or open access to the data.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin']} }) + hash: FileHash = Field(default=..., title="File Hash", description="""File hash information""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin']} }) + access_type: EnumAccessType = Field(default=..., title="Access Type", description="""Notes wheter a file is controlled, open, or registered-tier access""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin', 'FileAssay']} }) + access_url: Optional[str] = Field(default=None, title="Access URL", description="""HTTPS endpoint for accessing a file via a specific data repository service.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + drs_uri: Optional[str] = Field(default=None, title="DRS URI", description="""DRS location to access the data.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin']} }) + acl: str = Field(default=..., title="ACL", description="""The object access control list.""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + is_released: bool = Field(default=..., title="Is Released", description="""A flag that notes whether a file has been released to the public""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + is_registered: bool = Field(default=..., title="Is Registered", description="""A flag that notes whether a file has been registered to a drs service""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + repository: Optional[EnumRepository] = Field(default=None, title="Repository", description="""The name of the drs service which files are registered to""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin']} }) + experimental_strategy: EnumExperimentalStrategy = Field(default=..., title="Experimental Strategy", description="""Method or assay used to generate the data""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin', 'FileAssay']} }) + + +class FileAssay(ConfiguredBaseModel): + """ + A file produced by or associated with an assay or data acquisition process including omics, imaging, actigraphy, and other experimental or observational data. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://includedcc.org/include-access-model', + 'slot_usage': {'data_category': {'name': 'data_category', 'required': True}, + 'file_id': {'identifier': True, + 'name': 'file_id', + 'required': True}, + 'sample_id': {'multivalued': True, + 'name': 'sample_id', + 'required': True}, + 'subject_id': {'multivalued': True, + 'name': 'subject_id', + 'required': True}}, + 'title': 'File Assay'}) + + file_id: str = Field(default=..., title="File ID", description="""Unique identifier for this File.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'Dataset', 'FileAdmin', 'FileAssay']} }) + subject_id: list[str] = Field(default=..., title="Study ID", description="""INCLUDE Global ID for the Subject""", json_schema_extra = { "linkml_meta": {'domain_of': ['Subject', + 'Demographics', + 'SubjectAssertion', + 'Encounter', + 'File', + 'FileAdmin', + 'FileAssay']} }) + sample_id: list[str] = Field(default=..., title="Sample ID", description="""The unique identifier for this Sample.""", json_schema_extra = { "linkml_meta": {'domain_of': ['Sample', 'Aliquot', 'File', 'FileAdmin', 'FileAssay']} }) + data_category: EnumDataCategory = Field(default=..., title="Data Category", description="""General category of data in this Record (e.g. Clinical, Genomics, etc)""", json_schema_extra = { "linkml_meta": {'domain_of': ['StudyMetadata', 'File', 'FileAssay']} }) + experimental_strategy: EnumExperimentalStrategy = Field(default=..., title="Experimental Strategy", description="""Method or assay used to generate the data""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin', 'FileAssay']} }) + data_type: EnumEDAMDataTypes = Field(default=..., title="Data Type", description="""The type of data within this file.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAssay']} }) + format: EnumEDAMFormats = Field(default=..., title="File Format", description="""The format of the file.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAssay']} }) + size: int = Field(default=..., title="File Size", description="""Size of the file, in Bytes.""", json_schema_extra = { "linkml_meta": {'domain_of': ['File', 'FileAdmin', 'FileAssay'], 'unit': {'ucum_code': 'By'}} }) + access_type: EnumAccessType = Field(default=..., title="Access Type", description="""Notes wheter a file is controlled, open, or registered-tier access""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAdmin', 'FileAssay']} }) + assay_center: Optional[EnumAssayCenter] = Field(default=None, title="Assay Center", description="""The organization or center that generated the file""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAssay']} }) + platform: EnumPlatform = Field(default=..., title="Platform", description="""Instrument or platform family name""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAssay']} }) + workflow_name: Optional[str] = Field(default=None, title="Workflow Name", description="""Processing tool that produced the file""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAssay']} }) + workflow_version: Optional[str] = Field(default=None, title="Workflow Version", description="""Version of the process tool that produced the file""", json_schema_extra = { "linkml_meta": {'domain_of': ['FileAssay']} }) + + # Model rebuild # see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model Record.model_rebuild() @@ -880,3 +1057,5 @@ class Dataset(ConfiguredBaseModel): File.model_rebuild() FileHash.model_rebuild() Dataset.model_rebuild() +FileAdmin.model_rebuild() +FileAssay.model_rebuild() diff --git a/src/include_access_model/schema/include_access_model.yaml b/src/include_access_model/schema/include_access_model.yaml index 5d22101..27298c8 100644 --- a/src/include_access_model/schema/include_access_model.yaml +++ b/src/include_access_model/schema/include_access_model.yaml @@ -308,38 +308,50 @@ classes: identifier: true File: title: File - description: File + description: Required information for portal use. is_a: Record slots: - - file_id - - subject_id + - study_id + - file_id + - subject_id # do we need both a subject and sample id in this table? - sample_id + - s3_file_path # can name this url if more appropriate - filename + - size - format - - data_category + - data_category - data_type - format - size - #TODO: I'm not convinced this is the right strategy- access model vs operations +#TODO: I'm not convinced this is the right strategy- access model vs operations - staging_url - release_url - drs_uri - - hash + - hash + slot_usage: + study_id: + required: true file_id: range: string required: true identifier: true subject_id: + required: true multivalued: true sample_id: + required: true multivalued: true + data_category: + required: true + FileHash: title: File Hash description: Type and value of a file content hash. slots: - hash_type - hash_value + Dataset: title: Dataset description: Set of files grouped together for release. @@ -363,7 +375,95 @@ classes: multivalued: true description: The list of files comprising this dataset. + FileAdmin: # names are TBD; can change - idea is this is operational or file universe model title: File Admin + description: File unvierse; contains all information about a file that may be needed for operational work + slots: + - study_id + - file_id + - subject_id # do we need both a subject and sample id in this table? + - sample_id + - s3_file_path # can name this url if more appropriate + - file_category + - size + - s3_key + - file_extension + - data_transfer_id + - aws_account_id + - account_name + - account_alias + - bucket_study_id + - bucket + - s3_created_at + - s3_modified_at + - intelligent_tiering_access_tier + - is_delete_marker + - is_latest + - storage_class + - manifest_hash_value + - file_hash_validation_status + - file_type + - encryption_status + - is_multipart_uploaded + - object_lock_level_hold_status + - object_lock_mode + - object_lock_retain_until_date + - replication_status + - version_id + - staging_url + - release_url + - hash + - access_type + - access_url + - drs_uri + - acl + - is_released + - is_registered + - repository + - experimental_strategy + + slot_usage: + study_id: + required: true + file_id: + required: true + identifier: true + FileAssay: + title: File Assay + # for now group all types into one table; but we may want to split out since different + # assay types collect different types of information + # this is a basic model + description: A file produced by or associated with an assay + or data acquisition process including omics, imaging, + actigraphy, and other experimental or observational data. + slots: + - file_id + - subject_id + - sample_id + - data_category + - experimental_strategy + - data_type + - format + - size + - access_type + - assay_center + - platform + - workflow_name + - workflow_version + + slot_usage: + file_id: + required: true + identifier: true + subject_id: + required: true + multivalued: true + sample_id: + required: true + multivalued: true + data_category: + required: true + slots: study_id: title: Study ID @@ -744,44 +844,54 @@ slots: title: Filename description: The name of the file. range: string + required: true format: title: File Format description: The format of the file. range: EnumEDAMFormats + required: true data_type: title: Data Type description: The type of data within this file. range: EnumEDAMDataTypes + required: true size: title: File Size description: Size of the file, in Bytes. range: integer unit: - ucum_code: By + ucum_code: By + required: true staging_url: title: Staging Location description: URL for internal access to the data. May be temporary. range: uriorcurie + required: false release_url: title: Release Location description: URL for controlled or open access to the data. range: uriorcurie + required: false drs_uri: title: DRS URI description: DRS location to access the data. range: uriorcurie + required: false hash: title: File Hash description: File hash information range: FileHash + required: true hash_type: title: File Hash Type description: The type of file hash, eg, md5 range: EnumFileHashType + required: true hash_value: title: File Hash Value description: The value of the file hash range: string + required: true dataset_id: title: Dataset ID description: Unique identifier for a Dataset. @@ -796,6 +906,195 @@ slots: description: The date that data collection started. May include only a year. #TODO: We could re-evaluate these as dates, but that may be too implementation specific range: string + s3_file_path: + title: S3 File Path + description: The full s3 url of a file's location in aws + range: string + required: true + is_released: + title: Is Released + description: A flag that notes whether a file has been released to the public + range: boolean + required: true + is_registered: + title: Is Registered + description: A flag that notes whether a file has been registered to a drs service + range: boolean + required: true + repository: + title: Repository + description: The name of the drs service which files are registered to + range: EnumRepository + required: false + file_category: + title: File Category + description: A high level classification of the file used for operations. + range: string + required: true + s3_key: + title: S3 Key + description: The unique identifier for an object within a bucket + range: string + required: true + file_extension: + title: File Extension + description: A 3-4 letter code at the end of a filename that identifies the file format. + range: string + required: true + data_transfer_id: + title: Data Transfer ID + description: A jira ticket number associated with a file transfer request to production bucket + range: string + required: false + aws_account_id: + title: AWS Account ID + description: A 12-digit number that uniquely identifies a specific AWS account + range: string + required: true + account_name: + title: AWS Account Name + description: A user-defined label used to define an AWS accoun. + range: string + required: true + account_alias: + title: Account Alias + description: A unique user-defined string that replaces the AWS Account ID in the IAM user sign-in URL + range: string + required: true + bucket_study_id: + title: Bucket Study ID + description: The global study ID used to create the bucket + range: string + required: true + bucket: + title: Bucket + description: Cloud storage container in AWS used to manage and store s3 objects + range: string + required: true + s3_created_at: + title: S3 Created At + description: Timestamp of when a file was uploaded to an s3 bucket. + range: datetime + required: true + s3_modified_at: + title: S3 Modified At + description: Timestamp of when a file was modified in an s3 bucket. + range: datetime + required: true + intelligent_tiering_access_tier: + title: Intelligent Tiering Access Tier + description: Storage access tier assigned by AWS intelliegnt tiering, indicating the current access frequency classification of the object + range: string + required: true + is_delete_marker: + title: Is Delete Marker + description: A flag that notes whether a file has been deleted from s3 + range: boolean + required: true + is_latest: + title: Is Latest + description: Specifies whether an object version is the most recent version of that object + range: boolean + required: true + storage_class: + title: Storage Class + description: Storage tier of the object in AWS reflecting cost and access characteristics. + range: string + required: true + manifest_hash_value: + title: Manifest Hash Value + description: The provided hash value from external users to be validated against internal hash values + range: string + required: false + file_hash_validation_status: + title: File Hash Validation Status + description: Notes whether hashes have been generated and verified against manifest hash values. + range: string + required: false + file_type: + title: File Type + description: An internal type or classification of the files based on its operational usuage. + range: string + required: true + encryption_status: + title: Encryption Status + description: Indicates whether the object in AWS is encrypted and the type of encryption applied. + range: string + required: true + is_multipart_uploaded: + title: Is Multipart Uploaded + description: Indicates whether the object was uploaded using a multipart upload process. + range: string + required: true + object_lock_level_hold_status: + title: Object Lock Level Hold Status + description: Whether a legal hold is applied to prevent deletion of the object. + range: string + required: true + object_lock_mode: + title: Object Lock Mode + description: Retention mode applied to the object that restricts deletion or modification. + range: string + required: true + replication_status: + title: Replication Status + description: Status of the object's replication to another storage location. + range: string + required: true + version_id: + title: Version ID + description: Identifier for a specific version of the object + range: string + required: true + access_type: + title: Access Type + description: Notes wheter a file is controlled, open, or registered-tier access + range: EnumAccessType + required: true + access_url: + title: Access URL + description: HTTPS endpoint for accessing a file via a specific data repository service. + range: string + required: false + acl: + title: ACL + description: The object access control list. + range: string + required: true + experimental_strategy: + title: Experimental Strategy + description: Method or assay used to generate the data + range: EnumExperimentalStrategy + required: true + assay_center: + title: Assay Center + description: The organization or center that generated the file + range: EnumAssayCenter + required: false + platform: + title: Platform + description: Instrument or platform family name + range: EnumPlatform + required: true + workflow_name: + title: Workflow Name + description: Processing tool that produced the file + range: string + required: false + workflow_version: + title: Workflow Version + description: Version of the process tool that produced the file + range: string + required: false + object_lock_retain_until_date: + title: Object Lock Retain Until Date + description: Specifies exact date and time when an object's Object Lock rentention period expires. + range: datetime + required: true + + + + enums: EnumProgram: @@ -1137,5 +1436,84 @@ enums: title: ETag sha1: title: SHA-1 + EnumAccessType: + description: Types of file access levels. + permissible_values: + open: + title: Open Access + controlled: + title: Controlled Access + registered: + title: Registered-tier Access + EnumExperimentalStrategy: + description: Types of sequencing methods. + permissible_values: + wgs: + title: Whole Genome Sequencing + rnaseq: + title: RNA-Seq + wxs: + title: Whole Exome Sequencing + methlyation: + title: Methylation + clr_wgs: + title: Continuous Long Reads WGS + proteomics: + title: Proteomics + targeted_seq: + title: Targeted Sequencing + ccs_wgs: + title: Circular Consensus Sequencing WGS + panel: + title: Panel + ccs_rnaseq: + title: Circular Consensus Sequencing RNA-Se + ont_wgs: + title: Oxford Nanopore Technologies WGS + clr_rnaseq: + title: Continuous Long Reads RNA-Seq + EnumAssayCenter: + description: Organizations or centers producing raw or harmonized sequencing files. + permissible_values: + # can add more as needed + broad: + title: The Broad Institute + hudsonalpha: + title: HudsonAlpha Institute for Biotechnology + stjude: + title: St. Jude + baylor: + title: Baylor College of Medicine + chop: + title: The Children's Hospital of Philadelphia + other: + title: Other + unknown: + title: Unknown + EnumRepository: + description: specific drs service used for registration + permissible_values: + cavatica: + title: Cavatica DRS + dcf: + title: NCI DCF + other: + title: Other + EnumPlatform: + description: names of instrument or platforms used for assay data generation + permissible_values: + illumina: + title: Illumina + pacbio: + title: PacBio + ont: + title: ONT + illumina_epic: + title: Illumina Infinium HumanMethylationEPICv2 + other: + title: Other + unknown: + title: Unknown +