Skip to contents

case_when() is a survey-aware version of dplyr::case_when() that evaluates each formula case sequentially and uses the first match for each element to determine the output value.

Use case_when() when creating an entirely new vector. When partially updating an existing vector, replace_when() is a better choice — it retains the original value wherever no case matches and inherits existing value labels from the input automatically.

When any of .label, .value_labels, .factor, or .description are supplied, output label metadata is written to @metadata after mutate(). When none of these arguments are used, the output is identical to dplyr::case_when().

Usage

case_when(
  ...,
  .default = NULL,
  .unmatched = "default",
  .ptype = NULL,
  .size = NULL,
  .label = NULL,
  .value_labels = NULL,
  .factor = FALSE,
  .description = NULL
)

Arguments

...

<dynamic-dots> A sequence of two-sided formulas (condition ~ value). The left-hand side must be a logical vector. The right-hand side provides the replacement value. Cases are evaluated sequentially; the first matching case is used. NULL inputs are ignored.

.default

The value used when all LHS conditions return FALSE or NA. If NULL (the default), unmatched rows receive NA.

.unmatched

Handling of unmatched rows. "default" (the default) uses .default; "error" raises an error if any row is unmatched.

.ptype

An optional prototype declaring the desired output type. Overrides the common type of the RHS inputs.

.size

An optional size declaring the desired output length. Overrides the common size computed from the LHS inputs.

.label

character(1) or NULL. Variable label stored in @metadata@variable_labels after mutate(). Cannot be combined with .factor = TRUE.

.value_labels

Named vector or NULL. Value labels stored in @metadata@value_labels. Names are the label strings; values are the data values.

.factor

logical(1). If TRUE, returns a factor. Levels are ordered by the RHS values in formula order, or by .value_labels names if supplied. Cannot be combined with .label.

.description

character(1) or NULL. Plain-language description of how the variable was created. Stored in @metadata@transformations[[col]]$description after mutate().

Value

A vector, factor, or haven_labelled vector:

  • No surveytidy args — same output as dplyr::case_when().

  • .factor = TRUE — a factor with levels in RHS formula order.

  • .label or .value_labels supplied — a haven_labelled vector.

See also

Other recoding: if_else(), na_if(), recode_values(), replace_values(), replace_when()

Examples


# load the libraries
library(surveycore)
library(surveytidy)

# create the survey design
ns_wave1_svy <- as_survey_nonprob(
  ns_wave1,
  weights = weight
)

# ---------------------------------------------------------------------
# Basic case_when — identical to dplyr::case_when() -------------------
# ---------------------------------------------------------------------

new <- ns_wave1_svy |>
  mutate(
    # make a new variable for young partisans
    age_pid = case_when(
      age < 30 & pid3 == 1 ~ "18-29 Democrats",
      age < 30 & pid3 == 2 ~ "18-29 Republicans",
      age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
      .default = "Everyone else"
    )
  ) |>
  # keep only only the relevant columns
  select(age, pid3, age_pid)

# show the new column
new
#> 
#> ── Survey Design ───────────────────────────────────────────────────────────────
#> <survey_nonprob> (calibrated / non-probability) [experimental]
#> Sample size: 6422
#> 
#> # A tibble: 6,422 × 3
#>      age  pid3 age_pid           
#>    <dbl> <dbl> <chr>             
#>  1    37     1 Everyone else     
#>  2    45     1 Everyone else     
#>  3    24     1 18-29 Democrats   
#>  4    26     3 18-29 Independents
#>  5    60     2 Everyone else     
#>  6    55     1 Everyone else     
#>  7    37     4 Everyone else     
#>  8    46     2 Everyone else     
#>  9    60     2 Everyone else     
#> 10    32     1 Everyone else     
#> # ℹ 6,412 more rows
#> 
#>  Design variables preserved but hidden: weight.
#>  Use `print(x, full = TRUE)` to show all variables.

# By default, no metadata is attached
new@metadata
#> <surveycore::survey_metadata>
#>  @ variable_labels  :List of 3
#>  .. $ pid3  : chr "3-category party ID"
#>  .. $ age   : chr "What is your age? Provided by LUCID. Response is an integer value 18 or ..."
#>  .. $ weight: chr "Survey weight, continuous value from 0-5"
#>  @ value_labels     :List of 1
#>  .. $ pid3: Named num [1:4] 1 2 3 4
#>  ..  ..- attr(*, "names")= chr [1:4] "Democrat" "Republican" "Independent" "Something else"
#>  @ question_prefaces: Named list()
#>  @ notes            : list()
#>  @ universe         : list()
#>  @ missing_codes    : list()
#>  @ transformations  :List of 1
#>  .. $ age_pid: chr "case_when(age < 30 & pid3 == 1 ~ \"18-29 Democrats\", age < 30 & \n    pid3 == 2 ~ \"18-29 Republicans\", age <"| __truncated__
#>  @ weighting_history: list()


# --------------------------------------------------------------------
# Set metadata -------------------------------------------------------
# --------------------------------------------------------------------

# ---- Variable label ----
new <- ns_wave1_svy |>
  mutate(
    # make a new variable for young partisans
    age_pid = case_when(
      age < 30 & pid3 == 1 ~ "18-29 Democrats",
      age < 30 & pid3 == 2 ~ "18-29 Republicans",
      age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
      .default = "Everyone else",
      # set variable label
      .label = "Age and Partisanship"
    )
  ) |>
  # show the output of the new column relative to original columsn
  select(age, pid3, age_pid)

# Show variable labels, we can see that age_pid is blank
new@metadata@variable_labels
#> $pid3
#> [1] "3-category party ID"
#> 
#> $age
#> [1] "What is your age? Provided by LUCID. Response is an integer value 18 or ..."
#> 
#> $weight
#> [1] "Survey weight, continuous value from 0-5"
#> 
#> $age_pid
#> [1] "Age and Partisanship"
#> 

# ---- Transformation ----

# set the plain word description of how the variable was created
new <- ns_wave1_svy |>
  mutate(
    # make a new variable for young partisans
    age_pid = case_when(
      age < 30 & pid3 == 1 ~ "18-29 Democrats",
      age < 30 & pid3 == 2 ~ "18-29 Republicans",
      age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
      .default = "Everyone else",
      # set variable label
      .label = "Age and Partisanship",
      # set the description of the transformation
      .description = "Those with age < 30 AND pid3 = 1 were set to '18-29 Democrats',\n
         those with age < 30 AND pid3 = 2 were set to '18-29 Republicans', \n
         those with age < 30 AND pid3 = 3 or 4 were set to '18-29 Independents', \n
         everyone else was set to 'Everyone else'"
    )
  ) |>
  # show the output of the new column relative to original columsn
  select(age, pid3, age_pid)

# Show variable labels, we can see that age_pid is blank
new@metadata@transformations
#> $age_pid
#> $age_pid$fn
#> [1] "case_when"
#> 
#> $age_pid$source_cols
#> [1] "age"  "pid3"
#> 
#> $age_pid$expr
#> [1] "case_when(age < 30 & pid3 == 1 ~ \"18-29 Democrats\", age < 30 & "                                                                                                                                                                                                                                                     
#> [2] "    pid3 == 2 ~ \"18-29 Republicans\", age < 30 & pid3 %in% c(3:4) ~ "                                                                                                                                                                                                                                                 
#> [3] "    \"18-29 Independents\", .default = \"Everyone else\", .label = \"Age and Partisanship\", "                                                                                                                                                                                                                         
#> [4] "    .description = \"Those with age < 30 AND pid3 = 1 were set to '18-29 Democrats',\\n\\n         those with age < 30 AND pid3 = 2 were set to '18-29 Republicans', \\n\\n         those with age < 30 AND pid3 = 3 or 4 were set to '18-29 Independents', \\n\\n         everyone else was set to 'Everyone else'\")"
#> 
#> $age_pid$output_type
#> [1] "vector"
#> 
#> $age_pid$description
#> [1] "Those with age < 30 AND pid3 = 1 were set to '18-29 Democrats',\n\n         those with age < 30 AND pid3 = 2 were set to '18-29 Republicans', \n\n         those with age < 30 AND pid3 = 3 or 4 were set to '18-29 Independents', \n\n         everyone else was set to 'Everyone else'"
#> 
#> 

# ---- Value labels ----

# Add value labels
new <- ns_wave1_svy |>
  mutate(
    age_pid = case_when(
      # set party for 18-29
      age < 30 & pid3 == 1 ~ 1,
      age < 30 & pid3 == 2 ~ 2,
      age < 30 & pid3 %in% c(3:4) ~ 3,
      .default = 4,
      # add variable label
      .label = "Age and Partisanship",
      # add value labels
      .value_labels = c(
        "18-29 Democrats" = 1,
        "18-29 Republicans" = 2,
        "18-29 Independents" = 3,
        "Everyone else" = 4
      )
    )
  ) |>
  select(age, pid3, gender, age_pid)

new@metadata@value_labels
#> $pid3
#>       Democrat     Republican    Independent Something else 
#>              1              2              3              4 
#> 
#> $gender
#> Female   Male 
#>      1      2 
#> 
#> $age_pid
#>    18-29 Democrats  18-29 Republicans 18-29 Independents      Everyone else 
#>                  1                  2                  3                  4 
#> 

# --------------------------------------------------------------------
# Make output a factor -----------------------------------------------
# --------------------------------------------------------------------

new <- ns_wave1_svy |>
  mutate(
    # make a new variable for young partisans
    age_pid = case_when(
      age < 30 & pid3 == 1 ~ "18-29 Democrats",
      age < 30 & pid3 == 2 ~ "18-29 Republicans",
      age < 30 & pid3 %in% c(3:4) ~ "18-29 Independents",
      .default = "Everyone else",
      # make output a factor based on it's appearance
      .factor = TRUE
    )
  ) |>
  # show the output of the new column relative to original columsn
  select(age, pid3, age_pid)

new
#> 
#> ── Survey Design ───────────────────────────────────────────────────────────────
#> <survey_nonprob> (calibrated / non-probability) [experimental]
#> Sample size: 6422
#> 
#> # A tibble: 6,422 × 3
#>      age  pid3 age_pid           
#>    <dbl> <dbl> <fct>             
#>  1    37     1 Everyone else     
#>  2    45     1 Everyone else     
#>  3    24     1 18-29 Democrats   
#>  4    26     3 18-29 Independents
#>  5    60     2 Everyone else     
#>  6    55     1 Everyone else     
#>  7    37     4 Everyone else     
#>  8    46     2 Everyone else     
#>  9    60     2 Everyone else     
#> 10    32     1 Everyone else     
#> # ℹ 6,412 more rows
#> 
#>  Design variables preserved but hidden: weight.
#>  Use `print(x, full = TRUE)` to show all variables.