An R Validator for the validation of data against user-defined schemas.
This package is under development and subject to change constantly.
You can install the development version of RV like so:
# install.packages("pak")
pak::pak("LJ-Jenkins/RV")library(RV)
df <- data.frame(x = 1:3, y = c(" a", "b ", " c"))
v <- Validator(
data = df,
schema = list(
type = "data.frame",
min_nrow = 1,
x = list(
type = "numeric",
max_val = 5
),
y = list(
type = "character",
apply = "function(x) trimws(x)",
nzchar = TRUE
)
)
)
# Specified data is transformed
v@data
#> x y
#> 1 1 a
#> 2 2 b
#> 3 3 c
# Overall validity
v@valid
#> [1] TRUE
# Structured errors property
v@errors
#> $type
#> NULL
#>
#> $min_nrow
#> NULL
#>
#> $x
#> $x$type
#> NULL
#>
#> $x$max_val
#> NULL
#>
#>
#> $y
#> $y$apply
#> NULL
#>
#> $y$type
#> NULL
#>
#> $y$nzchar
#> NULL
# Informative errors that reflect the perceived data structure
Validator(
data = list(1, a = "a", b = 10, x = -1),
schema = list(
type = "data.frame",
list(type = "character"),
a = list(min_nchar = 2),
b = list(min_length = 2, max_val = 5),
x = list(positive = TRUE)
),
error = TRUE
)
#> Error:
#> ! <RV::Validator> object is invalid:
#> - Data validation failed with the following errors:
#> ├─ type: Is not type `data.frame`.
#> ├─ [[1]]
#> │ └─ type: Is not type `character`.
#> ├─ a
#> │ └─ min_nchar: Char length(s) must be at least 2.
#> ├─ b
#> │ ├─ max_val: Value(s) must be at most 5.
#> │ └─ min_length: Length must be at least 2.
#> └─ x
#> └─ positive: Value(s) must be positive (or zero).
# Transformed data can be accessed during the validation
Validator(
data = list(a = 1, b = 1),
schema = list(
a = list(apply = "function(x) x + 1"),
b = list(apply = "function(x, .data, ...) if (.data[['a']] > 1) x + 1")
)
)@data
#> $a
#> [1] 2
#>
#> $b
#> [1] 2
# Extensible
s <- Schema(list(double_if_five_else_error = TRUE))
s@valid
#> [1] FALSE
s <- add_rule(
s,
name = "double_if_five_else_error",
validator_fn = function(field, schema_field, ...) {
if (schema_field) {
if (field != 5) {
list(error = "Does not equal 5.")
} else {
list(data = field * 2)
}
}
},
schema_fn = function(schema_field, ...) {
if (!isTRUE(schema_field) && !isFALSE(schema_field)) {
"Must be a boolean."
}
},
rule_type = "validate"
)
s@valid
#> [1] TRUE
v <- Validator(data = 5, schema = s)
v@valid
#> [1] TRUE
v@data
#> [1] 10
Validator(data = 1, schema = s, error = TRUE)
#> Error:
#> ! <RV::Validator> object is invalid:
#> - Data validation failed with the following errors:
#> └─ double_if_five_else_error: Does not equal 5.RV provides three
S7 classes:
Registry, Schema, and Validator.
Registry defines rules and stores all built-in RV rule names and
definitions.
r <- Registry()
S7::prop_names(r)
#> [1] "rule_names" "control_rules" "transform_rules"
#> [4] "validate_rules" "str_to_fn_rules" "str_to_fn_converter"
#> [7] "type_names" "type_map" "coerce_names"
#> [10] "coerce_map" "schema_rules" "cross_rule_names"
#> [13] "cross_rules" "validator_rules"Schema takes a user-defined nested list schema, validates the schema,
and reorders the schema according to the order defined in the
Registry. By default Schema creates a Registry if one is not
passed to the function.
s <- Schema(list(type = "integer", default = 1L))
s@schema
#> $default
#> [1] 1
#>
#> $type
#> [1] "integer"
S7::prop_names(s)
#> [1] "schema" "errors" "Registry" ".schema_cache"
#> [5] "error" "error_print_opts" "valid"Validator takes data and a user-defined Schema, and applies each
Schema field against the data. It does this in three passes, first
applying ‘control’ rules, then ‘transform’ rules, then ‘validate’ rules.
A list given as a schema will be passed to Schema() on ingest.
v <- Validator(
data = list(a = 1, b = "Hello"),
schema = list(
a = list(
type = "numeric",
min_val = 0,
max_val = 5
),
b = list(
type = "character",
apply = "\\(x) paste(x, 'World!')"
),
c = list(
required = FALSE,
type = "data.frame"
),
d = list(
default = 10L
)
)
)
v@data
#> $a
#> [1] 1
#>
#> $b
#> [1] "Hello World!"
#>
#> $d
#> [1] 10
S7::prop_names(v)
#> [1] "data" "Schema" "errors" ".validator_cache"
#> [5] "error" "valid"With list schemas and list/data.frame atomic vector data, RV can be
used on a range of data types once loaded into R.
yaml_schema <- yaml::yaml.load(
"
type: 'list'
a:
type: 'character'
b:
type: 'list'
a:
type: 'numeric'
b:
type: 'character'
min_nchar: 3
"
)
yaml_data <- yaml::yaml.load(
"
a: 1
b:
a: 1
b: 'Hi'
"
)
Validator(yaml_data, yaml_schema, error = TRUE)
#> Error:
#> ! <RV::Validator> object is invalid:
#> - Data validation failed with the following errors:
#> ├─ a
#> │ └─ type: Is not type `character`.
#> └─ b
#> └─ b
#> └─ min_nchar: Char length(s) must be at least 3.
json_schema <- jsonlite::fromJSON(
'{
"type": "list",
"a": {
"type": "numeric",
"min_length": 2
},
"b": {
"type": "list",
"a": {
"type": "numeric",
"max_val": 5
},
"b": {
"type": "character"
}
}
}'
)
json_data <- jsonlite::fromJSON(
'{
"a": 1,
"b": {
"a": 10,
"b": "Hi"
}
}'
)
Validator(json_data, json_schema, error = TRUE)
#> Error:
#> ! <RV::Validator> object is invalid:
#> - Data validation failed with the following errors:
#> ├─ a
#> │ └─ min_length: Length must be at least 2.
#> └─ b
#> └─ a
#> └─ max_val: Value(s) must be at most 5.
# rectangular data, from `readr` readme
# works for any data.frame data, e.g., sav, dta, xls, xlsx, csv, tsv, etc.
rect_schema <- list(
type = "data.frame",
chicken = list(type = "character", nzchar = TRUE),
sex = list(coerce = "factor", levels = c("rooster", "hen")),
eggs_laid = list(type = "integer", positive = TRUE),
motto = list(type = "character", nzchar = TRUE)
)
rect_data <- readr::read_csv(
readr::readr_example("chickens.csv"),
show_col_types = FALSE
)
Validator(rect_data, rect_schema, error = TRUE)
#> Error:
#> ! <RV::Validator> object is invalid:
#> - Data validation failed with the following errors:
#> └─ eggs_laid
#> └─ type: Is not type `integer`.For detailed information on using RV, see the vignettes:
(In development)
RV was inspired by and modelled on Python’s Cerberus. Error printing in RV was modelled on lobstr’s tree function.
If you encounter a clear bug, please file an issue with a minimal reproducible example on GitHub.
Please note that this project is released with a Contributor Code of Conduct. By contributing to this project, you agree to abide by its terms.
