case_when() is a survey-aware version of dplyr::case_when() that
evaluates each formula case sequentially and uses the first match for each
element to determine the output value.
Use case_when() when creating an entirely new vector. When partially
updating an existing vector, replace_when() is a better choice — it
retains the original value wherever no case matches and inherits existing
value labels from the input automatically.
When any of .label, .value_labels, .factor, or .description are
supplied, output label metadata is written to @metadata after mutate().
When none of these arguments are used, the output is identical to
dplyr::case_when().
Usage
case_when(
...,
.default = NULL,
.unmatched = "default",
.ptype = NULL,
.size = NULL,
.label = NULL,
.value_labels = NULL,
.factor = FALSE,
.description = NULL
)Arguments
- ...
<
dynamic-dots> A sequence of two-sided formulas (condition ~ value). The left-hand side must be a logical vector. The right-hand side provides the replacement value. Cases are evaluated sequentially; the first matching case is used.NULLinputs are ignored.- .default
The value used when all LHS conditions return
FALSEorNA. IfNULL(the default), unmatched rows receiveNA.- .unmatched
Handling of unmatched rows.
"default"(the default) uses.default;"error"raises an error if any row is unmatched.- .ptype
An optional prototype declaring the desired output type. Overrides the common type of the RHS inputs.
- .size
An optional size declaring the desired output length. Overrides the common size computed from the LHS inputs.
- .label
character(1)orNULL. Variable label stored in@metadata@variable_labelsaftermutate(). Cannot be combined with.factor = TRUE.- .value_labels
Named vector or
NULL. Value labels stored in@metadata@value_labels. Names are the label strings; values are the data values.- .factor
logical(1). IfTRUE, returns a factor. Levels are ordered by the RHS values in formula order, or by.value_labelsnames if supplied. Cannot be combined with.label.- .description
character(1)orNULL. Plain-language description of how the variable was created. Stored in@metadata@transformations[[col]]$descriptionaftermutate().
Value
A vector, factor, or haven_labelled vector:
No surveytidy args — same output as
dplyr::case_when()..factor = TRUE— a factor with levels in RHS formula order..labelor.value_labelssupplied — ahaven_labelledvector.
See also
dplyr::case_when()for the base implementation.replace_when()to partially update an existing vector; also inherits existing value labels from the input automatically.if_else()for the two-condition case.recode_values()for value-mapping with explicitfrom/tovectors.
Other recoding:
if_else(),
na_if(),
recode_values(),
replace_values(),
replace_when()
Examples
library(surveycore)
library(surveytidy)
# create the survey design
ns_wave1_svy <- as_survey_nonprob(
ns_wave1,
weights = weight
)
# basic case_when — identical to dplyr::case_when()
new <- ns_wave1_svy |>
mutate(
age_pid = case_when(
age < 30 & pid3 == 1 ~ "18-29 Democrats",
age < 30 & pid3 == 2 ~ "18-29 Republicans",
age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
.default = "Everyone else"
)
) |>
select(age, pid3, age_pid)
# by default, no metadata is attached
new
#>
#> ── Survey Design ───────────────────────────────────────────────────────────────
#> <survey_nonprob> (non-probability) [experimental]
#> Sample size: 6422
#>
#> # A tibble: 6,422 × 3
#> age pid3 age_pid
#> <dbl> <dbl> <chr>
#> 1 37 1 Everyone else
#> 2 45 1 Everyone else
#> 3 24 1 18-29 Democrats
#> 4 26 3 18-29 Independents
#> 5 60 2 Everyone else
#> 6 55 1 Everyone else
#> 7 37 4 Everyone else
#> 8 46 2 Everyone else
#> 9 60 2 Everyone else
#> 10 32 1 Everyone else
#> # ℹ 6,412 more rows
#>
#> ℹ Design variables preserved but hidden: weight.
#> ℹ Use `print(x, full = TRUE)` to show all variables.
new@metadata
#> <surveycore::survey_metadata>
#> @ variable_labels :List of 3
#> .. $ pid3 : chr "3-category party ID"
#> .. $ age : chr "What is your age? Provided by LUCID. Response is an integer value 18 or ..."
#> .. $ weight: chr "Survey weight, continuous value from 0-5"
#> @ value_labels :List of 1
#> .. $ pid3: Named num [1:4] 1 2 3 4
#> .. ..- attr(*, "names")= chr [1:4] "Democrat" "Republican" "Independent" "Something else"
#> @ question_prefaces: Named list()
#> @ notes : list()
#> @ universe : list()
#> @ missing_codes : list()
#> @ sata : list()
#> @ transformations :List of 1
#> .. $ age_pid: chr "case_when(age < 30 & pid3 == 1 ~ \"18-29 Democrats\", age < 30 & \n pid3 == 2 ~ \"18-29 Republicans\", age <"| __truncated__
#> @ weighting_history: list()
# attach a variable label via .label
new <- ns_wave1_svy |>
mutate(
age_pid = case_when(
age < 30 & pid3 == 1 ~ "18-29 Democrats",
age < 30 & pid3 == 2 ~ "18-29 Republicans",
age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
.default = "Everyone else",
.label = "Age and Partisanship"
)
) |>
select(age, pid3, age_pid)
new@metadata@variable_labels
#> $pid3
#> [1] "3-category party ID"
#>
#> $age
#> [1] "What is your age? Provided by LUCID. Response is an integer value 18 or ..."
#>
#> $weight
#> [1] "Survey weight, continuous value from 0-5"
#>
#> $age_pid
#> [1] "Age and Partisanship"
#>
# attach a plain-language description of the transformation
new <- ns_wave1_svy |>
mutate(
age_pid = case_when(
age < 30 & pid3 == 1 ~ "18-29 Democrats",
age < 30 & pid3 == 2 ~ "18-29 Republicans",
age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
.default = "Everyone else",
.label = "Age and Partisanship",
.description = paste(
"Young (< 30) Democrats, Republicans, and Independents",
"were grouped by partisanship; everyone else was set to",
"'Everyone else'."
)
)
) |>
select(age, pid3, age_pid)
new@metadata@transformations
#> $age_pid
#> $age_pid$fn
#> [1] "case_when"
#>
#> $age_pid$source_cols
#> [1] "age" "pid3"
#>
#> $age_pid$expr
#> [1] "case_when(age < 30 & pid3 == 1 ~ \"18-29 Democrats\", age < 30 & "
#> [2] " pid3 == 2 ~ \"18-29 Republicans\", age < 30 & pid3 %in% c(3:4) ~ "
#> [3] " \"18-29 Independents\", .default = \"Everyone else\", .label = \"Age and Partisanship\", "
#> [4] " .description = paste(\"Young (< 30) Democrats, Republicans, and Independents\", "
#> [5] " \"were grouped by partisanship; everyone else was set to\", "
#> [6] " \"'Everyone else'.\"))"
#>
#> $age_pid$output_type
#> [1] "vector"
#>
#> $age_pid$description
#> [1] "Young (< 30) Democrats, Republicans, and Independents were grouped by partisanship; everyone else was set to 'Everyone else'."
#>
#>
# attach value labels alongside numeric codes
new <- ns_wave1_svy |>
mutate(
age_pid = case_when(
age < 30 & pid3 == 1 ~ 1,
age < 30 & pid3 == 2 ~ 2,
age < 30 & pid3 %in% c(3:4) ~ 3,
.default = 4,
.label = "Age and Partisanship",
.value_labels = c(
"18-29 Democrats" = 1,
"18-29 Republicans" = 2,
"18-29 Independents" = 3,
"Everyone else" = 4
)
)
) |>
select(age, pid3, gender, age_pid)
new@metadata@value_labels
#> $pid3
#> Democrat Republican Independent Something else
#> 1 2 3 4
#>
#> $gender
#> Female Male
#> 1 2
#>
#> $age_pid
#> 18-29 Democrats 18-29 Republicans 18-29 Independents Everyone else
#> 1 2 3 4
#>
# return a factor with levels in formula order
new <- ns_wave1_svy |>
mutate(
age_pid = case_when(
age < 30 & pid3 == 1 ~ "18-29 Democrats",
age < 30 & pid3 == 2 ~ "18-29 Republicans",
age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
.default = "Everyone else",
.factor = TRUE
)
) |>
select(age, pid3, age_pid)
new
#>
#> ── Survey Design ───────────────────────────────────────────────────────────────
#> <survey_nonprob> (non-probability) [experimental]
#> Sample size: 6422
#>
#> # A tibble: 6,422 × 3
#> age pid3 age_pid
#> <dbl> <dbl> <fct>
#> 1 37 1 Everyone else
#> 2 45 1 Everyone else
#> 3 24 1 18-29 Democrats
#> 4 26 3 18-29 Independents
#> 5 60 2 Everyone else
#> 6 55 1 Everyone else
#> 7 37 4 Everyone else
#> 8 46 2 Everyone else
#> 9 60 2 Everyone else
#> 10 32 1 Everyone else
#> # ℹ 6,412 more rows
#>
#> ℹ Design variables preserved but hidden: weight.
#> ℹ Use `print(x, full = TRUE)` to show all variables.
