Convert a vector of class factor or
haven_labelled
to a "binary vector". When I refer to a "binary vector", I am referring to a
vector of class numeric with two values: 0 or 1. Another way of thinking
about this is by turning a variable into a dummy variable.
Details
make_binary() builds off make_dicho() and therefore was designed to work
on any vector that is of class factor,
haven_labelled,
or numeric with value labels. Because this was built off of make_dicho,
if the vector is  numeric with no value labels, the function will return an
error.
Similar to how make_dicho() provides the opportunity to flip the factor
levels, make_binary() allows you to flip which values should be recoded as
0 and which should be recoded as 1. To do so, just set flip_values = TRUE.
In addition, this function adds three new attributes. The first attribute,
transformation, indicates the data transformation that the original vector
underwent to create this new vector. The second attribute, label, contains
the variable label that was found in the original variable. However, if the
original vector did not have a variable label, then this attribute will not
show up. The third attribute, labels, adds value labels so you can see
what the 1 and 0 mean.
Examples
library(dplyr)
library(adlgraphs)
df <- tibble::tribble(
  ~x, ~y, ~z,
  3, 2, 3,
  4, 4, 2,
  2, 6, 1,
  1, 1, 4,
  5, 4, 3,
  6, 5, 6
) 
labs <- c(
  "Strongly agree" = 1,
  "Agree" = 2,
  "Somewhat agree" = 3,
  "Somewhat disagree" = 4,
  "Disagree" = 5,
  "Strongly disagree" = 6
 )
attr(df$x, "labels") <- labs
attr(df$y, "labels") <- labs
attr(df$z, "labels") <- labs
# show the data transformation with a haven_labelled vector
binary_df <- dplyr::mutate(df, binary_x = make_binary(x))
# check the updated dataset
binary_df
#> # A tibble: 6 × 4
#>       x     y     z binary_x
#>   <dbl> <dbl> <dbl>    <dbl>
#> 1     3     2     3        1
#> 2     4     4     2        0
#> 3     2     6     1       NA
#> 4     1     1     4        1
#> 5     5     4     3       NA
#> 6     6     5     6        0
# Check the attributes
attributes(binary_df$binary_x)
#> $transformation
#> [1] "Converting 'x' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#> 
#> $labels
#>    Agree Disagree 
#>        1        0 
#> 
#> $label
#> [1] "x"
#> 
# another way of checking the attributes
str(binary_df$binary_x)
#>  num [1:6] 1 0 NA 1 NA 0
#>  - attr(*, "transformation")= chr "Converting 'x' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#>  - attr(*, "labels")= Named num [1:2] 1 0
#>   ..- attr(*, "names")= chr [1:2] "Agree" "Disagree"
#>  - attr(*, "label")= chr "x"
# check the factor levels
unique(binary_df$binary_x)
#> [1]  1  0 NA
# ----------------------------------------------------------------------------
# function also works with factors
binary_df <- df %>%
  dplyr::mutate(
    # convert variable to a factor
    factor_x = make_factor(x),
    # convert the factor to a binary variable
    binary_x = make_binary(factor_x)
  )
# check the updated dataset
binary_df
#> # A tibble: 6 × 5
#>       x     y     z factor_x          binary_x
#>   <dbl> <dbl> <dbl> <fct>                <dbl>
#> 1     3     2     3 Somewhat agree           1
#> 2     4     4     2 Somewhat disagree        0
#> 3     2     6     1 Agree                   NA
#> 4     1     1     4 Strongly agree           1
#> 5     5     4     3 Disagree                NA
#> 6     6     5     6 Strongly disagree        0
# Check the attributes
attributes(binary_df$binary_x)
#> $transformation
#> [1] "Converting 'factor_x' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#> 
#> $label
#> [1] "x"
#> 
#> $labels
#>    Agree Disagree 
#>        1        0 
#> 
# another way of checking the attributes
str(binary_df$binary_x)
#>  num [1:6] 1 0 NA 1 NA 0
#>  - attr(*, "transformation")= chr "Converting 'factor_x' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#>  - attr(*, "label")= chr "x"
#>  - attr(*, "labels")= Named num [1:2] 1 0
#>   ..- attr(*, "names")= chr [1:2] "Agree" "Disagree"
# check the factor levels
unique(binary_df$binary_x)
#> [1]  1  0 NA
# ----------------------------------------------------------------------------
# function also works inside dplyr::across()
# Create new columns using `across()`
binary_df <- df %>%
  dplyr::mutate(
    # use this example if you don't want to flip the factor levels
    dplyr::across(
      x:z,
      make_binary,
      .names = "binary_{col}"
    ),
    # if you want to flip the factor levels, follow this example
    dplyr::across(
      c(x:z),
      # the . placeholder is important to remember
      ~ make_binary(.x, flip_values = TRUE),
      .names = "binary_flipped_{col}"
    )
  )
# show that the function worked properly by creating two new sets of variables
binary_df
#> # A tibble: 6 × 9
#>       x     y     z binary_x binary_y binary_z binary_flipped_x binary_flipped_y
#>   <dbl> <dbl> <dbl>    <dbl>    <dbl>    <dbl>            <dbl>            <dbl>
#> 1     3     2     3        1       NA        1                0               NA
#> 2     4     4     2        0        0       NA                1                1
#> 3     2     6     1       NA        0        1               NA                1
#> 4     1     1     4        1        1        0                0                0
#> 5     5     4     3       NA        0        1               NA                1
#> 6     6     5     6        0       NA        0                1               NA
#> # ℹ 1 more variable: binary_flipped_z <dbl>
# show the underlying structure of the entire df
str(binary_df)
#> tibble [6 × 9] (S3: tbl_df/tbl/data.frame)
#>  $ x               : num [1:6] 3 4 2 1 5 6
#>   ..- attr(*, "labels")= Named num [1:6] 1 2 3 4 5 6
#>   .. ..- attr(*, "names")= chr [1:6] "Strongly agree" "Agree" "Somewhat agree" "Somewhat disagree" ...
#>  $ y               : num [1:6] 2 4 6 1 4 5
#>   ..- attr(*, "labels")= Named num [1:6] 1 2 3 4 5 6
#>   .. ..- attr(*, "names")= chr [1:6] "Strongly agree" "Agree" "Somewhat agree" "Somewhat disagree" ...
#>  $ z               : num [1:6] 3 2 1 4 3 6
#>   ..- attr(*, "labels")= Named num [1:6] 1 2 3 4 5 6
#>   .. ..- attr(*, "names")= chr [1:6] "Strongly agree" "Agree" "Somewhat agree" "Somewhat disagree" ...
#>  $ binary_x        : num [1:6] 1 0 NA 1 NA 0
#>   ..- attr(*, "transformation")= chr "Converting 'x' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#>   ..- attr(*, "labels")= Named num [1:2] 1 0
#>   .. ..- attr(*, "names")= chr [1:2] "Agree" "Disagree"
#>   ..- attr(*, "label")= chr "x"
#>  $ binary_y        : num [1:6] NA 0 0 1 0 NA
#>   ..- attr(*, "transformation")= chr "Converting 'y' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#>   ..- attr(*, "labels")= Named num [1:2] 1 0
#>   .. ..- attr(*, "names")= chr [1:2] "Agree" "Disagree"
#>   ..- attr(*, "label")= chr "y"
#>  $ binary_z        : num [1:6] 1 NA 1 0 1 0
#>   ..- attr(*, "transformation")= chr "Converting 'z' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#>   ..- attr(*, "labels")= Named num [1:2] 1 0
#>   .. ..- attr(*, "names")= chr [1:2] "Agree" "Disagree"
#>   ..- attr(*, "label")= chr "z"
#>  $ binary_flipped_x: num [1:6] 0 1 NA 0 NA 1
#>   ..- attr(*, "transformation")= chr "Converting 'x' to a binary variable with 'Disagree' = 1 and 'Agree' = 0."
#>   ..- attr(*, "labels")= Named num [1:2] 1 0
#>   .. ..- attr(*, "names")= chr [1:2] "Disagree" "Agree"
#>   ..- attr(*, "label")= chr "x"
#>  $ binary_flipped_y: num [1:6] NA 1 1 0 1 NA
#>   ..- attr(*, "transformation")= chr "Converting 'y' to a binary variable with 'Disagree' = 1 and 'Agree' = 0."
#>   ..- attr(*, "labels")= Named num [1:2] 1 0
#>   .. ..- attr(*, "names")= chr [1:2] "Disagree" "Agree"
#>   ..- attr(*, "label")= chr "y"
#>  $ binary_flipped_z: num [1:6] 0 NA 0 1 0 1
#>   ..- attr(*, "transformation")= chr "Converting 'z' to a binary variable with 'Disagree' = 1 and 'Agree' = 0."
#>   ..- attr(*, "labels")= Named num [1:2] 1 0
#>   .. ..- attr(*, "names")= chr [1:2] "Disagree" "Agree"
#>   ..- attr(*, "label")= chr "z"
# show how the levels are flipped when "flip_levels = TRUE"
levels(binary_df$binary_x)
#> NULL
levels(binary_df$binary_flipped_x)
#> NULL
