Read-in bult-in dataset, import from text and CSV files, work with various delimiters and header profiles. For example, the heart dataset was exported from the built-in SAS library into a physical SAS dataset, which is then imported by R using the haven package. Another dataset Bweight contains birth weights for 50,000 babies, along with several variables believed to be related to birth weight, such as race (coded as black=1 or not black=0), mother’s smoking status (smoking=1 or non-smoking=0), and marital status (not married=0 or married=1).
R Code:
# if you wrote the SAS dataset with validvarname v6, you might need to fill in the mainframe style field names
# heart <- haven::read_sas("heart.sas7bdat") %>%
# rename(Status = STATUS,
# DeathCause = DEATHCAU, # Cause of Death
# AgeCHDdiag = AGECHDDI, # Age CHD Diagnosed
# Sex = SEX,
# AgeAtStart = AGEATSTA, # Age at Start
# Height = HEIGHT,
# Weight = WEIGHT,
# Diastolic = DIASTOLI,
# Systolic = SYSTOLIC,
# MRW = MRW, # Metropolitan Relative Weight
# Smoking = SMOKING,
# AgeAtDeath = AGEATDEA, # Age at Death
# Cholesterol = CHOLESTE,
# Chol_Status = CHOL_STA, # Cholesterol Status
# BP_Status = BP_STATU, # Blood Pressure Status
# Weight_Status = WEIGHT_S, # Weight Status
# Smoking_Status = SMOKING_)
# glimpse(heart)
# if you wrote the SAS dataset with validvarname v9, field names longer than 8 characters and mixed case are okay
heart <- haven::read_sas("heart_v9.sas7bdat")
glimpse(heart)
## Rows: 5,209
## Columns: 17
## $ Status <chr> "Dead", "Dead", "Alive", "Alive", "Alive", "Alive", "Al~
## $ DeathCause <chr> "Other", "Cancer", "", "", "", "", "", "Other", "", "Ce~
## $ AgeCHDdiag <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 57, 55, 79,~
## $ Sex <chr> "Female", "Female", "Female", "Female", "Male", "Female~
## $ AgeAtStart <dbl> 29, 41, 57, 39, 42, 58, 36, 53, 35, 52, 39, 33, 33, 57,~
## $ Height <dbl> 62.50, 59.75, 62.25, 65.75, 66.00, 61.75, 64.75, 65.50,~
## $ Weight <dbl> 140, 194, 132, 158, 156, 131, 136, 130, 194, 129, 179, ~
## $ Diastolic <dbl> 78, 92, 90, 80, 76, 92, 80, 80, 68, 78, 76, 68, 90, 76,~
## $ Systolic <dbl> 124, 144, 170, 128, 110, 176, 112, 114, 132, 124, 128, ~
## $ MRW <dbl> 121, 183, 114, 123, 116, 117, 110, 99, 124, 106, 133, 1~
## $ Smoking <dbl> 0, 0, 10, 0, 20, 0, 15, 0, 0, 5, 30, 0, 0, 15, 30, 10, ~
## $ AgeAtDeath <dbl> 55, 57, NA, NA, NA, NA, NA, 77, NA, 82, NA, NA, NA, NA,~
## $ Cholesterol <dbl> NA, 181, 250, 242, 281, 196, 196, 276, 211, 284, 225, 2~
## $ Chol_Status <chr> "", "Desirable", "High", "High", "High", "Desirable", "~
## $ BP_Status <chr> "Normal", "High", "High", "Normal", "Optimal", "High", ~
## $ Weight_Status <chr> "Overweight", "Overweight", "Overweight", "Overweight",~
## $ Smoking_Status <chr> "Non-smoker", "Non-smoker", "Moderate (6-15)", "Non-smo~
fish <- haven::read_sas("fish.sas7bdat")
glimpse(heart)
## Rows: 5,209
## Columns: 17
## $ Status <chr> "Dead", "Dead", "Alive", "Alive", "Alive", "Alive", "Al~
## $ DeathCause <chr> "Other", "Cancer", "", "", "", "", "", "Other", "", "Ce~
## $ AgeCHDdiag <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 57, 55, 79,~
## $ Sex <chr> "Female", "Female", "Female", "Female", "Male", "Female~
## $ AgeAtStart <dbl> 29, 41, 57, 39, 42, 58, 36, 53, 35, 52, 39, 33, 33, 57,~
## $ Height <dbl> 62.50, 59.75, 62.25, 65.75, 66.00, 61.75, 64.75, 65.50,~
## $ Weight <dbl> 140, 194, 132, 158, 156, 131, 136, 130, 194, 129, 179, ~
## $ Diastolic <dbl> 78, 92, 90, 80, 76, 92, 80, 80, 68, 78, 76, 68, 90, 76,~
## $ Systolic <dbl> 124, 144, 170, 128, 110, 176, 112, 114, 132, 124, 128, ~
## $ MRW <dbl> 121, 183, 114, 123, 116, 117, 110, 99, 124, 106, 133, 1~
## $ Smoking <dbl> 0, 0, 10, 0, 20, 0, 15, 0, 0, 5, 30, 0, 0, 15, 30, 10, ~
## $ AgeAtDeath <dbl> 55, 57, NA, NA, NA, NA, NA, 77, NA, 82, NA, NA, NA, NA,~
## $ Cholesterol <dbl> NA, 181, 250, 242, 281, 196, 196, 276, 211, 284, 225, 2~
## $ Chol_Status <chr> "", "Desirable", "High", "High", "High", "Desirable", "~
## $ BP_Status <chr> "Normal", "High", "High", "Normal", "Optimal", "High", ~
## $ Weight_Status <chr> "Overweight", "Overweight", "Overweight", "Overweight",~
## $ Smoking_Status <chr> "Non-smoker", "Non-smoker", "Moderate (6-15)", "Non-smo~
bweight <- haven::read_sas("bweight.sas7bdat")
glimpse(bweight)
## Rows: 50,000
## Columns: 10
## $ Weight <dbl> 4111, 3997, 3572, 1956, 3515, 3757, 2977, 3884, 3629, 3062,~
## $ Black <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,~
## $ Married <dbl> 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,~
## $ Boy <dbl> 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,~
## $ MomAge <dbl> -3, 1, 0, -1, -6, 3, -5, -5, 6, -1, -2, -6, 0, 1, 1, 7, -4,~
## $ MomSmoke <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,~
## $ CigsPerDay <dbl> 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 10, 0, 0, 0, 0, 0, 0~
## $ MomWtGain <dbl> -16, 2, -3, -5, -20, 0, 5, 0, -5, 6, 22, -1, 7, -6, 10, 15,~
## $ Visit <dbl> 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,~
## $ MomEdLevel <dbl> 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1,~
# test reading local files
# excel file import
read_excel("Grades.xlsx") %>%
glimpse()
## Rows: 3
## Columns: 8
## $ Name <chr> "Jones", "Hildebrand", "O'Brien"
## $ ID <dbl> 12345, 22222, 33333
## $ Quiz1 <dbl> 88, 95, 76
## $ Quiz2 <dbl> 80, 92, 78
## $ Midterm <dbl> 76, 91, 79
## $ Quiz3 <dbl> 88, 94, 81
## $ Quiz4 <dbl> 90, 90, 83
## $ Final <dbl> 82, 96, 80
# excel file with odd fields names import - backticked
read_excel("Grades2.xlsx") %>%
glimpse()
## Rows: 3
## Columns: 8
## $ `Stuent Name` <chr> "Jones", "Hildebrand", "O'Brien"
## $ ID <dbl> 12345, 22222, 33333
## $ `Quiz 1` <dbl> 88, 95, 76
## $ `Quiz 2` <dbl> 80, 92, 78
## $ `Mid Term` <dbl> 76, 91, 79
## $ `Quiz 3` <dbl> 88, 94, 81
## $ `Quiz 4` <dbl> 90, 90, 83
## $ `2015Final` <dbl> 82, 96, 80
# excel file with odd fields names import - rename on the fly
read_excel("Grades2.xlsx") %>%
rename(
Quiz_1 = `Quiz 1`,
Quiz_2 = `Quiz 2`,
Midterm = `Mid Term`,
Quiz_3 = `Quiz 3`,
Quiz_4 = `Quiz 4`,
Final_2015 = `2015Final`
) %>%
glimpse()
## Rows: 3
## Columns: 8
## $ `Stuent Name` <chr> "Jones", "Hildebrand", "O'Brien"
## $ ID <dbl> 12345, 22222, 33333
## $ Quiz_1 <dbl> 88, 95, 76
## $ Quiz_2 <dbl> 80, 92, 78
## $ Midterm <dbl> 76, 91, 79
## $ Quiz_3 <dbl> 88, 94, 81
## $ Quiz_4 <dbl> 90, 90, 83
## $ Final_2015 <dbl> 82, 96, 80
# csv file import
read_csv("Grades.csv") %>%
glimpse()
## Rows: 3 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): Name
## dbl (7): ID, Quiz1, Quiz2, Midterm, Quiz3, Quiz4, Final
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3
## Columns: 8
## $ Name <chr> "Jones", "Hildebrand", "O'Brien"
## $ ID <dbl> 12345, 22222, 33333
## $ Quiz1 <dbl> 88, 95, 76
## $ Quiz2 <dbl> 80, 92, 78
## $ Midterm <dbl> 76, 91, 79
## $ Quiz3 <dbl> 88, 94, 81
## $ Quiz4 <dbl> 90, 90, 83
## $ Final <dbl> 82, 96, 80
# txt file import space delimiter treat multiples as one
# read_delim with delim = " " will trip up on multiple spaces
# added period in the na spec list as per SAS input text file
# columns has no headings, and rename default names
read_table("Health_List.txt",
col_names = F, na = c("", "NA", ".")) %>%
rename(
Subj = X1,
Gender = X2,
Age = X3,
Heart_Rate = X4,
SBP = X5,
DBP = X6,
Chol = X7
) %>%
glimpse()
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_double(),
## X4 = col_double(),
## X5 = col_double(),
## X6 = col_double(),
## X7 = col_double()
## )
## Rows: 6
## Columns: 7
## $ Subj <chr> "001", "002", "003", "004", "005", "006"
## $ Gender <chr> "M", "F", "F", "M", "F", "F"
## $ Age <dbl> 23, 55, 18, 80, 34, 38
## $ Heart_Rate <dbl> 68, 72, 58, 82, 62, 78
## $ SBP <dbl> 120, 180, 118, NA, 128, 108
## $ DBP <dbl> 90, 90, 72, NA, 80, 68
## $ Chol <dbl> 128, 170, 122, 220, NA, 220
# fixed width txt file import
read_fwf("Health.txt",
fwf_cols(
Subj = 3,
Gender = 1,
Age = 2,
Heart_Rate = 2,
SBP = 3,
DBP = 3,
Chol = 3
)) %>%
glimpse()
## Rows: 5 Columns: 7
## -- Column specification --------------------------------------------------------
##
## chr (2): Subj, Gender
## dbl (5): Age, Heart_Rate, SBP, DBP, Chol
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 5
## Columns: 7
## $ Subj <chr> "001", "002", "003", "004", "005"
## $ Gender <chr> "M", "F", "F", "M", "F"
## $ Age <dbl> 23, 55, 18, 80, 34
## $ Heart_Rate <dbl> 68, 72, 58, 82, 62
## $ SBP <dbl> 120, 180, 118, NA, 128
## $ DBP <dbl> 90, 90, 72, NA, 80
## $ Chol <dbl> 128, 170, 122, 220, NA
# tab delimited text file with no column names
read_tsv("Blood_Pressure.txt",
col_names = F) %>%
rename(
Drug = X1,
Subj = X2,
Gender = X3,
SBP = X4,
DBP = X5
) %>%
glimpse()
## Rows: 60 Columns: 5
## -- Column specification --------------------------------------------------------
## Delimiter: "\t"
## chr (2): X1, X3
## dbl (3): X2, X4, X5
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 60
## Columns: 5
## $ Drug <chr> "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo~
## $ Subj <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ~
## $ Gender <chr> "F", "M", "F", NA, "F", "M", "F", "F", "F", "M", "F", "F", "M",~
## $ SBP <dbl> 138, 124, 150, 136, NA, 132, 130, 146, 134, 138, 144, 130, 134,~
## $ DBP <dbl> 86, 82, 72, 84, NA, 84, 84, 88, 82, 88, 84, 88, 80, 90, NA, 88,~
# Import / transform external files for exercises in subsequent chapters
# Chapter 5: Distribution Practice 5-3
blood_pressure <- read_excel("Blood_Pressure.xlsx")
glimpse(blood_pressure)
## Rows: 60
## Columns: 5
## $ Drug <chr> "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo~
## $ Subj <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ~
## $ Gender <chr> "F", "M", "F", NA, "F", "M", "F", "F", "F", "M", "F", "F", "M",~
## $ SBP <dbl> 138, 124, 150, 136, NA, 132, 130, 146, 134, 138, 144, 130, 134,~
## $ DBP <dbl> 86, 82, 72, 84, NA, 84, 84, 88, 82, 88, 84, 88, 80, 90, NA, 88,~
# Chapter 6: One-Sample Tests - Perch
perch <- read_excel("Perch.xlsx")
glimpse(perch)
## Rows: 56
## Columns: 3
## $ Weight <dbl> 5.9, 32.0, 40.0, 51.5, 70.0, 100.0, 78.0, 80.0, 85.0, 85.0, 110~
## $ Height <dbl> 2.1120, 3.5280, 3.8240, 4.5924, 4.5880, 5.2224, 5.1992, 5.6358,~
## $ Width <dbl> 1.4080, 1.9992, 2.4320, 2.6316, 2.9415, 3.3216, 3.1234, 3.0502,~
# Paired t Test - Yoga
yoga <- read_excel("Yoga.xlsx")
glimpse(yoga)
## Rows: 9
## Columns: 3
## $ Subj <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9
## $ Before <dbl> 78, 68, 76, 58, 83, 80, 69, 77, 77
## $ After <dbl> 74, 68, 70, 57, 73, 77, 61, 76, 72
# Chapter 8: One-way ANOVA with test for Tukey multiple comparisons 8.6
lvef <- as.character("
55 58 62 48 57 57 80 40 55 52
57 65 55 78 57 84 72 80 78 81
60 60 65 67 48 62 64 70 57 40
") %>%
read_table(col_names = F, na = c("", "NA")) %>%
tibble::rownames_to_column() %>%
pivot_longer(-rowname) %>%
mutate(
Group = case_when(
rowname == "1" ~ "Placebo"
, rowname == "2" ~ "Calcium"
, rowname == "3" ~ "Lasix"
, TRUE ~ rowname
)
, Subj = str_remove_all(name,"X")
) %>%
rename(LVEF = value) %>%
select(Group, Subj, LVEF)
glimpse(lvef)
## Rows: 30
## Columns: 3
## $ Group <chr> "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo"~
## $ Subj <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "1", "2", "3"~
## $ LVEF <dbl> 55, 58, 62, 48, 57, 57, 80, 40, 55, 52, 57, 65, 55, 78, 57, 84, ~
# Chapter 9: create 25 pct sample w/o replacement for Two-Way Anova
# The SASHELP library contains the Bweight dataset containing birth weights for 50,000 babies, along with several variables believed to be related to birth weight, such as race (coded as black or not black), mother's smoking status (smoking or non-smoking), and marital status.
# Weight (in grams) is the Dependent variable
# Black (0 = not black, 1=black)
# MomSmoke (0=no, 1=yes)
# Married (0=no, 1=yes)
set.seed(13579)
Birth_Wt_Sample <- bweight %>%
slice_sample(n = as.numeric(count(bweight)*.25), replace = FALSE)
summary(Birth_Wt_Sample)
## Weight Black Married Boy
## Min. : 240 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:3062 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :3402 Median :0.0000 Median :1.0000 Median :1.0000
## Mean :3368 Mean :0.1655 Mean :0.7129 Mean :0.5151
## 3rd Qu.:3714 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :5970 Max. :1.0000 Max. :1.0000 Max. :1.0000
## MomAge MomSmoke CigsPerDay MomWtGain
## Min. :-9.0000 Min. :0.0000 Min. : 0.000 Min. :-30.0000
## 1st Qu.:-4.0000 1st Qu.:0.0000 1st Qu.: 0.000 1st Qu.: -8.0000
## Median : 0.0000 Median :0.0000 Median : 0.000 Median : 0.0000
## Mean : 0.4449 Mean :0.1308 Mean : 1.482 Mean : 0.7021
## 3rd Qu.: 5.0000 3rd Qu.:0.0000 3rd Qu.: 0.000 3rd Qu.: 9.0000
## Max. :18.0000 Max. :1.0000 Max. :40.000 Max. : 68.0000
## Visit MomEdLevel
## Min. :0.000 Min. :0.000
## 1st Qu.:3.000 1st Qu.:0.000
## Median :3.000 Median :1.000
## Mean :2.701 Mean :1.218
## 3rd Qu.:3.000 3rd Qu.:2.000
## Max. :3.000 Max. :3.000
# writexl::write_xlsx(Birth_Wt_Sample, "Birth_Wt_Sample_R.xlsx")
set.seed(123456)
# results of this R sample extract snippet does not match the equivalent SAS proc surveyselect with same parameters and random seed, so re-do extract using LOJ to sas extract export.
Birth_Wt_Sample_SAS <- read_excel("Birth_Wt_Sample.xlsx")
summary(Birth_Wt_Sample_SAS)
## Weight Black Married Boy
## Min. : 322 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:3062 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :3402 Median :0.0000 Median :1.0000 Median :1.0000
## Mean :3373 Mean :0.1578 Mean :0.7095 Mean :0.5106
## 3rd Qu.:3714 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :5970 Max. :1.0000 Max. :1.0000 Max. :1.0000
## MomAge MomSmoke CigsPerDay MomWtGain
## Min. :-9.0000 Min. :0.0000 Min. : 0.000 Min. :-30.0000
## 1st Qu.:-4.0000 1st Qu.:0.0000 1st Qu.: 0.000 1st Qu.: -8.0000
## Median : 0.0000 Median :0.0000 Median : 0.000 Median : 0.0000
## Mean : 0.3814 Mean :0.1319 Mean : 1.479 Mean : 0.6146
## 3rd Qu.: 5.0000 3rd Qu.:0.0000 3rd Qu.: 0.000 3rd Qu.: 9.0000
## Max. :18.0000 Max. :1.0000 Max. :40.000 Max. : 68.0000
## Visit MomEdLevel
## Min. :0.000 Min. :0.00
## 1st Qu.:3.000 1st Qu.:0.00
## Median :3.000 Median :1.00
## Mean :2.699 Mean :1.22
## 3rd Qu.:3.000 3rd Qu.:2.00
## Max. :3.000 Max. :3.00
# Chapter 10: Correlation - Exercise
exercise <- read_excel("Exercise.xls")
glimpse(exercise)
## Rows: 50
## Columns: 6
## $ Subj <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ~
## $ Age <dbl> 68, 64, 76, 44, 55, 57, 64, 30, 35, 49, 63, 63, 19, 51, 54,~
## $ Pushups <dbl> 19, 36, 11, 35, 24, 14, 21, 48, 25, 9, 51, 30, 34, 23, 7, 4~
## $ Rest_Pulse <dbl> 75, 61, 74, 59, 76, 74, 69, 60, 55, 88, 55, 73, 65, 66, 59,~
## $ Max_Pulse <dbl> 124, 107, 115, 111, 115, 121, 106, 114, 107, 137, 102, 126,~
## $ Run_Pulse <dbl> 121, 110, 105, 108, 110, 118, 103, 110, 107, 134, 103, 125,~
# Chapter 12: Binary Logistic Regression
# Create a categorical weight variable from arbitrary median 3402 gram cutoff using non-missing weight from previously-created sample
high_low <- Birth_Wt_Sample_SAS %>%
filter(!is.na(Weight)) %>%
mutate(Wt_Group = case_when(Weight < 3402 ~ 1,
TRUE ~ 0))
glimpse(high_low)
## Rows: 12,500
## Columns: 11
## $ Weight <dbl> 3430, 3657, 4054, 4536, 3295, 3458, 3714, 2807, 3625, 3884,~
## $ Black <dbl> 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ Married <dbl> 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1,~
## $ Boy <dbl> 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0,~
## $ MomAge <dbl> -4, 6, -5, 3, 6, 8, 2, -5, 2, -2, -6, 6, 7, -8, -1, -9, -9,~
## $ MomSmoke <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,~
## $ CigsPerDay <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 4~
## $ MomWtGain <dbl> -6, 15, 21, -1, -29, -18, 25, 13, 0, -10, 12, -9, 7, -7, 23~
## $ Visit <dbl> 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,~
## $ MomEdLevel <dbl> 0, 0, 2, 3, 2, 1, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 2, 2, 3,~
## $ Wt_Group <dbl> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,~
# Chapter 13: Prepare generated dataset Heart_Attack
Heart_Attack <- read_excel("Heart_Attack.xlsx") %>%
mutate(Gender_ = case_when(Gender == 'F' ~ 'Female',
Gender == 'M' ~ 'Male',
TRUE ~ Gender),
High_Chol_ = case_when(High_Chol == 0 ~ 'No',
High_Chol == 1 ~ 'Yes',
TRUE ~ as.character(High_Chol)),
Heart_Attack_ = case_when(Heart_Attack == 0 ~ 'No',
Heart_Attack == 1 ~ 'Yes',
TRUE ~ as.character(Heart_Attack)),
Age_Group_ = case_when(Age_Group == 1 ~ '< 60',
Age_Group == 2 ~ '60-70',
Age_Group == 3 ~ '71+',
TRUE ~ as.character(Age_Group))
)
glimpse(Heart_Attack)
## Rows: 500
## Columns: 10
## $ Gender <chr> "F", "M", "F", "M", "F", "M", "F", "M", "F", "M", "F", "~
## $ Age <dbl> 63, 69, 69, 59, 71, 50, 57, 75, 60, 52, 80, 54, 75, 57, ~
## $ Age_Group <dbl> 2, 2, 2, 1, 3, 1, 1, 3, 2, 1, 3, 1, 3, 1, 2, 2, 2, 2, 1,~
## $ Chol <dbl> 211, 249, 139, 239, 195, 193, 179, 186, 164, 237, 178, 2~
## $ High_Chol <dbl> 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,~
## $ Heart_Attack <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,~
## $ Gender_ <chr> "Female", "Male", "Female", "Male", "Female", "Male", "F~
## $ High_Chol_ <chr> "Yes", "Yes", "No", "Yes", "No", "No", "No", "No", "No",~
## $ Heart_Attack_ <chr> "No", "No", "No", "No", "No", "No", "No", "Yes", "No", "~
## $ Age_Group_ <chr> "60-70", "60-70", "60-70", "< 60", "71+", "< 60", "< 60"~