case_when() is a survey-aware version of dplyr::case_when() that
evaluates each formula case sequentially and uses the first match for each
element to determine the output value.
Use case_when() when creating an entirely new vector. When partially
updating an existing vector, replace_when() is a better choice — it
retains the original value wherever no case matches and inherits existing
value labels from the input automatically.
When any of .label, .value_labels, .factor, or .description are
supplied, output label metadata is written to @metadata after mutate().
When none of these arguments are used, the output is identical to
dplyr::case_when().
Usage
case_when(
...,
.default = NULL,
.unmatched = "default",
.ptype = NULL,
.size = NULL,
.label = NULL,
.value_labels = NULL,
.factor = FALSE,
.description = NULL
)Arguments
- ...
<
dynamic-dots> A sequence of two-sided formulas (condition ~ value). The left-hand side must be a logical vector. The right-hand side provides the replacement value. Cases are evaluated sequentially; the first matching case is used.NULLinputs are ignored.- .default
The value used when all LHS conditions return
FALSEorNA. IfNULL(the default), unmatched rows receiveNA.- .unmatched
Handling of unmatched rows.
"default"(the default) uses.default;"error"raises an error if any row is unmatched.- .ptype
An optional prototype declaring the desired output type. Overrides the common type of the RHS inputs.
- .size
An optional size declaring the desired output length. Overrides the common size computed from the LHS inputs.
- .label
character(1)orNULL. Variable label stored in@metadata@variable_labelsaftermutate(). Cannot be combined with.factor = TRUE.- .value_labels
Named vector or
NULL. Value labels stored in@metadata@value_labels. Names are the label strings; values are the data values.- .factor
logical(1). IfTRUE, returns a factor. Levels are ordered by the RHS values in formula order, or by.value_labelsnames if supplied. Cannot be combined with.label.- .description
character(1)orNULL. Plain-language description of how the variable was created. Stored in@metadata@transformations[[col]]$descriptionaftermutate().
Value
A vector, factor, or haven_labelled vector:
No surveytidy args — same output as
dplyr::case_when()..factor = TRUE— a factor with levels in RHS formula order..labelor.value_labelssupplied — ahaven_labelledvector.
See also
dplyr::case_when()for the base implementation.replace_when()to partially update an existing vector; also inherits existing value labels from the input automatically.if_else()for the two-condition case.recode_values()for value-mapping with explicitfrom/tovectors.
Other recoding:
if_else(),
na_if(),
recode_values(),
replace_values(),
replace_when()
Examples
# load the libraries
library(surveycore)
library(surveytidy)
# create the survey design
ns_wave1_svy <- as_survey_nonprob(
ns_wave1,
weights = weight
)
# ---------------------------------------------------------------------
# Basic case_when — identical to dplyr::case_when() -------------------
# ---------------------------------------------------------------------
new <- ns_wave1_svy |>
mutate(
# make a new variable for young partisans
age_pid = case_when(
age < 30 & pid3 == 1 ~ "18-29 Democrats",
age < 30 & pid3 == 2 ~ "18-29 Republicans",
age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
.default = "Everyone else"
)
) |>
# keep only only the relevant columns
select(age, pid3, age_pid)
# show the new column
new
#>
#> ── Survey Design ───────────────────────────────────────────────────────────────
#> <survey_nonprob> (calibrated / non-probability) [experimental]
#> Sample size: 6422
#>
#> # A tibble: 6,422 × 3
#> age pid3 age_pid
#> <dbl> <dbl> <chr>
#> 1 37 1 Everyone else
#> 2 45 1 Everyone else
#> 3 24 1 18-29 Democrats
#> 4 26 3 18-29 Independents
#> 5 60 2 Everyone else
#> 6 55 1 Everyone else
#> 7 37 4 Everyone else
#> 8 46 2 Everyone else
#> 9 60 2 Everyone else
#> 10 32 1 Everyone else
#> # ℹ 6,412 more rows
#>
#> ℹ Design variables preserved but hidden: weight.
#> ℹ Use `print(x, full = TRUE)` to show all variables.
# By default, no metadata is attached
new@metadata
#> <surveycore::survey_metadata>
#> @ variable_labels :List of 3
#> .. $ pid3 : chr "3-category party ID"
#> .. $ age : chr "What is your age? Provided by LUCID. Response is an integer value 18 or ..."
#> .. $ weight: chr "Survey weight, continuous value from 0-5"
#> @ value_labels :List of 1
#> .. $ pid3: Named num [1:4] 1 2 3 4
#> .. ..- attr(*, "names")= chr [1:4] "Democrat" "Republican" "Independent" "Something else"
#> @ question_prefaces: Named list()
#> @ notes : list()
#> @ universe : list()
#> @ missing_codes : list()
#> @ transformations :List of 1
#> .. $ age_pid: chr "case_when(age < 30 & pid3 == 1 ~ \"18-29 Democrats\", age < 30 & \n pid3 == 2 ~ \"18-29 Republicans\", age <"| __truncated__
#> @ weighting_history: list()
# --------------------------------------------------------------------
# Set metadata -------------------------------------------------------
# --------------------------------------------------------------------
# ---- Variable label ----
new <- ns_wave1_svy |>
mutate(
# make a new variable for young partisans
age_pid = case_when(
age < 30 & pid3 == 1 ~ "18-29 Democrats",
age < 30 & pid3 == 2 ~ "18-29 Republicans",
age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
.default = "Everyone else",
# set variable label
.label = "Age and Partisanship"
)
) |>
# show the output of the new column relative to original columsn
select(age, pid3, age_pid)
# Show variable labels, we can see that age_pid is blank
new@metadata@variable_labels
#> $pid3
#> [1] "3-category party ID"
#>
#> $age
#> [1] "What is your age? Provided by LUCID. Response is an integer value 18 or ..."
#>
#> $weight
#> [1] "Survey weight, continuous value from 0-5"
#>
#> $age_pid
#> [1] "Age and Partisanship"
#>
# ---- Transformation ----
# set the plain word description of how the variable was created
new <- ns_wave1_svy |>
mutate(
# make a new variable for young partisans
age_pid = case_when(
age < 30 & pid3 == 1 ~ "18-29 Democrats",
age < 30 & pid3 == 2 ~ "18-29 Republicans",
age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
.default = "Everyone else",
# set variable label
.label = "Age and Partisanship",
# set the description of the transformation
.description = "Those with age < 30 AND pid3 = 1 were set to '18-29 Democrats',\n
those with age < 30 AND pid3 = 2 were set to '18-29 Republicans', \n
those with age < 30 AND pid3 = 3 or 4 were set to '18-29 Independents', \n
everyone else was set to 'Everyone else'"
)
) |>
# show the output of the new column relative to original columsn
select(age, pid3, age_pid)
# Show variable labels, we can see that age_pid is blank
new@metadata@transformations
#> $age_pid
#> $age_pid$fn
#> [1] "case_when"
#>
#> $age_pid$source_cols
#> [1] "age" "pid3"
#>
#> $age_pid$expr
#> [1] "case_when(age < 30 & pid3 == 1 ~ \"18-29 Democrats\", age < 30 & "
#> [2] " pid3 == 2 ~ \"18-29 Republicans\", age < 30 & pid3 %in% c(3:4) ~ "
#> [3] " \"18-29 Independents\", .default = \"Everyone else\", .label = \"Age and Partisanship\", "
#> [4] " .description = \"Those with age < 30 AND pid3 = 1 were set to '18-29 Democrats',\\n\\n those with age < 30 AND pid3 = 2 were set to '18-29 Republicans', \\n\\n those with age < 30 AND pid3 = 3 or 4 were set to '18-29 Independents', \\n\\n everyone else was set to 'Everyone else'\")"
#>
#> $age_pid$output_type
#> [1] "vector"
#>
#> $age_pid$description
#> [1] "Those with age < 30 AND pid3 = 1 were set to '18-29 Democrats',\n\n those with age < 30 AND pid3 = 2 were set to '18-29 Republicans', \n\n those with age < 30 AND pid3 = 3 or 4 were set to '18-29 Independents', \n\n everyone else was set to 'Everyone else'"
#>
#>
# ---- Value labels ----
# Add value labels
new <- ns_wave1_svy |>
mutate(
age_pid = case_when(
# set party for 18-29
age < 30 & pid3 == 1 ~ 1,
age < 30 & pid3 == 2 ~ 2,
age < 30 & pid3 %in% c(3:4) ~ 3,
.default = 4,
# add variable label
.label = "Age and Partisanship",
# add value labels
.value_labels = c(
"18-29 Democrats" = 1,
"18-29 Republicans" = 2,
"18-29 Independents" = 3,
"Everyone else" = 4
)
)
) |>
select(age, pid3, gender, age_pid)
new@metadata@value_labels
#> $pid3
#> Democrat Republican Independent Something else
#> 1 2 3 4
#>
#> $gender
#> Female Male
#> 1 2
#>
#> $age_pid
#> 18-29 Democrats 18-29 Republicans 18-29 Independents Everyone else
#> 1 2 3 4
#>
# --------------------------------------------------------------------
# Make output a factor -----------------------------------------------
# --------------------------------------------------------------------
new <- ns_wave1_svy |>
mutate(
# make a new variable for young partisans
age_pid = case_when(
age < 30 & pid3 == 1 ~ "18-29 Democrats",
age < 30 & pid3 == 2 ~ "18-29 Republicans",
age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
.default = "Everyone else",
# make output a factor based on it's appearance
.factor = TRUE
)
) |>
# show the output of the new column relative to original columsn
select(age, pid3, age_pid)
new
#>
#> ── Survey Design ───────────────────────────────────────────────────────────────
#> <survey_nonprob> (calibrated / non-probability) [experimental]
#> Sample size: 6422
#>
#> # A tibble: 6,422 × 3
#> age pid3 age_pid
#> <dbl> <dbl> <fct>
#> 1 37 1 Everyone else
#> 2 45 1 Everyone else
#> 3 24 1 18-29 Democrats
#> 4 26 3 18-29 Independents
#> 5 60 2 Everyone else
#> 6 55 1 Everyone else
#> 7 37 4 Everyone else
#> 8 46 2 Everyone else
#> 9 60 2 Everyone else
#> 10 32 1 Everyone else
#> # ℹ 6,412 more rows
#>
#> ℹ Design variables preserved but hidden: weight.
#> ℹ Use `print(x, full = TRUE)` to show all variables.
