Convert a vector of class factor
or
haven_labelled
to a "binary vector". When I refer to a "binary vector", I am referring to a
vector of class numeric
with two values: 0 or 1. Another way of thinking
about this is by turning a variable into a dummy variable.
Details
make_binary()
builds off make_dicho()
and therefore was designed to work
on any vector that is of class factor
,
haven_labelled
,
or numeric
with value labels. Because this was built off of make_dicho,
if the vector is numeric with no value labels, the function will return an
error.
Similar to how make_dicho()
provides the opportunity to flip the factor
levels, make_binary()
allows you to flip which values should be recoded as
0 and which should be recoded as 1. To do so, just set flip_values = TRUE
.
In addition, this function adds three new attributes. The first attribute,
transformation
, indicates the data transformation that the original vector
underwent to create this new vector. The second attribute, label
, contains
the variable label that was found in the original variable. However, if the
original vector did not have a variable label, then this attribute will not
show up. The third attribute, labels
, adds value labels so you can see
what the 1 and 0 mean.
Examples
library(tibble)
library(dplyr)
library(labelled)
library(haven)
# create fake data
df <- tibble::tribble(
~x, ~y, ~z,
3, 2, 3,
4, 4, 2,
2, 6, 1,
1, 1, 4,
5, 4, 3,
6, 5, 6
) %>%
# add value labels
labelled::set_value_labels(
x = c(`Strongly agree` = 1,
`Agree` = 2,
`Somewhat agree` = 3,
`Somewhat disagree` = 4,
`Disagree` = 5,
`Strongly disagree` = 6),
y = c(`Strongly agree` = 1,
`Agree` = 2,
`Somewhat agree` = 3,
`Somewhat disagree` = 4,
`Disagree` = 5,
`Strongly disagree` = 6),
z = c(`Strongly agree` = 1,
`Agree` = 2,
`Somewhat agree` = 3,
`Somewhat disagree` = 4,
`Disagree` = 5,
`Strongly disagree` = 6)
) %>%
# add variable labels
labelled::set_variable_labels(
x = "This is the variable label for x",
y = "This is the variable label for y",
z = "This is the variable label for z"
)
# show the data transformation with a haven_labelled vector
binary_df <- df %>% dplyr::mutate(binary_x = make_binary(x))
# check the updated dataset
binary_df
#> # A tibble: 6 × 4
#> x y z binary_x
#> <dbl+lbl> <dbl+lbl> <dbl+lbl> <dbl>
#> 1 3 [Somewhat agree] 2 [Agree] 3 [Somewhat agree] 1
#> 2 4 [Somewhat disagree] 4 [Somewhat disagree] 2 [Agree] 0
#> 3 2 [Agree] 6 [Strongly disagree] 1 [Strongly agree] 1
#> 4 1 [Strongly agree] 1 [Strongly agree] 4 [Somewhat disagree] 1
#> 5 5 [Disagree] 4 [Somewhat disagree] 3 [Somewhat agree] 0
#> 6 6 [Strongly disagree] 5 [Disagree] 6 [Strongly disagree] 0
# Check the attributes
attributes(binary_df$binary_x)
#> $transformation
#> Converting 'x' to a binary variable with 'Agree' = 1 and 'Disagree' = 0.
#>
#> $label
#> [1] "This is the variable label for x"
#>
#> $labels
#> Agree Disagree
#> 1 0
#>
# another way of checking the attributes
str(binary_df$binary_x)
#> num [1:6] 1 0 1 1 0 0
#> - attr(*, "transformation")= 'glue' chr "Converting 'x' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#> - attr(*, "label")= chr "This is the variable label for x"
#> - attr(*, "labels")= Named num [1:2] 1 0
#> ..- attr(*, "names")= chr [1:2] "Agree" "Disagree"
# check the factor levels
unique(binary_df$binary_x)
#> [1] 1 0
# ----------------------------------------------------------------------------
# function also works with factors
binary_df <- df %>%
dplyr::mutate(
# convert variable to a factor
factor_x = make_factor(x),
# convert the factor to a binary variable
binary_x = make_binary(factor_x)
)
# check the updated dataset
binary_df
#> # A tibble: 6 × 5
#> x y z factor_x binary_x
#> <dbl+lbl> <dbl+lbl> <dbl+lbl> <fct> <dbl>
#> 1 3 [Somewhat agree] 2 [Agree] 3 [Somewhat agr… Somewha… 1
#> 2 4 [Somewhat disagree] 4 [Somewhat disagree] 2 [Agree] Somewha… 0
#> 3 2 [Agree] 6 [Strongly disagree] 1 [Strongly agr… Agree 1
#> 4 1 [Strongly agree] 1 [Strongly agree] 4 [Somewhat dis… Strongl… 1
#> 5 5 [Disagree] 4 [Somewhat disagree] 3 [Somewhat agr… Disagree 0
#> 6 6 [Strongly disagree] 5 [Disagree] 6 [Strongly dis… Strongl… 0
# Check the attributes
attributes(binary_df$binary_x)
#> $transformation
#> Converting 'factor_x' to a binary variable with 'Agree' = 1 and 'Disagree' = 0.
#>
#> $label
#> [1] "This is the variable label for x"
#>
#> $labels
#> Agree Disagree
#> 1 0
#>
# another way of checking the attributes
str(binary_df$binary_x)
#> num [1:6] 1 0 1 1 0 0
#> - attr(*, "transformation")= 'glue' chr "Converting 'factor_x' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#> - attr(*, "label")= chr "This is the variable label for x"
#> - attr(*, "labels")= Named num [1:2] 1 0
#> ..- attr(*, "names")= chr [1:2] "Agree" "Disagree"
# check the factor levels
unique(binary_df$binary_x)
#> [1] 1 0
# ----------------------------------------------------------------------------
# function also works inside dplyr::across()
# Create new columns using `across()`
binary_df <- df %>%
dplyr::mutate(
# use this example if you don't want to flip the factor levels
dplyr::across(
x:z,
make_binary,
.names = "binary_{col}"
),
# if you want to flip the factor levels, follow this example
dplyr::across(
x:z,
~make_binary(., flip_values = TRUE),
.names = "binary_flipped_{col}"
)
)
# show that the function worked properly by creating two new sets of variables
binary_df
#> # A tibble: 6 × 9
#> x y z binary_x binary_y binary_z binary_flipped_x
#> <dbl+lbl> <dbl+l> <dbl+l> <dbl> <dbl> <dbl> <dbl>
#> 1 3 [Somewhat agree] 2 [Agr… 3 [Som… 1 1 1 0
#> 2 4 [Somewhat disag… 4 [Som… 2 [Agr… 0 0 1 1
#> 3 2 [Agree] 6 [Str… 1 [Str… 1 0 1 0
#> 4 1 [Strongly agree] 1 [Str… 4 [Som… 1 1 0 0
#> 5 5 [Disagree] 4 [Som… 3 [Som… 0 0 1 1
#> 6 6 [Strongly disag… 5 [Dis… 6 [Str… 0 0 0 1
#> # ℹ 2 more variables: binary_flipped_y <dbl>, binary_flipped_z <dbl>
# show the underlying structure of the entire df
str(binary_df)
#> tibble [6 × 9] (S3: tbl_df/tbl/data.frame)
#> $ x : dbl+lbl [1:6] 3, 4, 2, 1, 5, 6
#> ..@ labels: Named num [1:6] 1 2 3 4 5 6
#> .. ..- attr(*, "names")= chr [1:6] "Strongly agree" "Agree" "Somewhat agree" "Somewhat disagree" ...
#> ..@ label : chr "This is the variable label for x"
#> $ y : dbl+lbl [1:6] 2, 4, 6, 1, 4, 5
#> ..@ labels: Named num [1:6] 1 2 3 4 5 6
#> .. ..- attr(*, "names")= chr [1:6] "Strongly agree" "Agree" "Somewhat agree" "Somewhat disagree" ...
#> ..@ label : chr "This is the variable label for y"
#> $ z : dbl+lbl [1:6] 3, 2, 1, 4, 3, 6
#> ..@ labels: Named num [1:6] 1 2 3 4 5 6
#> .. ..- attr(*, "names")= chr [1:6] "Strongly agree" "Agree" "Somewhat agree" "Somewhat disagree" ...
#> ..@ label : chr "This is the variable label for z"
#> $ binary_x : num [1:6] 1 0 1 1 0 0
#> ..- attr(*, "transformation")= 'glue' chr "Converting 'x' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#> ..- attr(*, "label")= chr "This is the variable label for x"
#> ..- attr(*, "labels")= Named num [1:2] 1 0
#> .. ..- attr(*, "names")= chr [1:2] "Agree" "Disagree"
#> $ binary_y : num [1:6] 1 0 0 1 0 0
#> ..- attr(*, "transformation")= 'glue' chr "Converting 'y' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#> ..- attr(*, "label")= chr "This is the variable label for y"
#> ..- attr(*, "labels")= Named num [1:2] 1 0
#> .. ..- attr(*, "names")= chr [1:2] "Agree" "Disagree"
#> $ binary_z : num [1:6] 1 1 1 0 1 0
#> ..- attr(*, "transformation")= 'glue' chr "Converting 'z' to a binary variable with 'Agree' = 1 and 'Disagree' = 0."
#> ..- attr(*, "label")= chr "This is the variable label for z"
#> ..- attr(*, "labels")= Named num [1:2] 1 0
#> .. ..- attr(*, "names")= chr [1:2] "Agree" "Disagree"
#> $ binary_flipped_x: num [1:6] 0 1 0 0 1 1
#> ..- attr(*, "transformation")= 'glue' chr "Converting 'x' to a binary variable with 'Disagree' = 1 and 'Agree' = 0."
#> ..- attr(*, "label")= chr "This is the variable label for x"
#> ..- attr(*, "labels")= Named num [1:2] 1 0
#> .. ..- attr(*, "names")= chr [1:2] "Disagree" "Agree"
#> $ binary_flipped_y: num [1:6] 0 1 1 0 1 1
#> ..- attr(*, "transformation")= 'glue' chr "Converting 'y' to a binary variable with 'Disagree' = 1 and 'Agree' = 0."
#> ..- attr(*, "label")= chr "This is the variable label for y"
#> ..- attr(*, "labels")= Named num [1:2] 1 0
#> .. ..- attr(*, "names")= chr [1:2] "Disagree" "Agree"
#> $ binary_flipped_z: num [1:6] 0 0 0 1 0 1
#> ..- attr(*, "transformation")= 'glue' chr "Converting 'z' to a binary variable with 'Disagree' = 1 and 'Agree' = 0."
#> ..- attr(*, "label")= chr "This is the variable label for z"
#> ..- attr(*, "labels")= Named num [1:2] 1 0
#> .. ..- attr(*, "names")= chr [1:2] "Disagree" "Agree"
# show how the levels are flipped when "flip_levels = TRUE"
levels(binary_df$binary_x)
#> NULL
levels(binary_df$binary_flipped_x)
#> NULL