birthwt dataset from the MASS package for analysislibrary(tidyverse)
library(MASS)
library(grid)
library(gridExtra)
birthwt dataset from the MASS package for analysisbirthwt is a data.frame of 189 obs. and 10 variables.
Variables are described below.
data(birthwt)
str(birthwt)
## 'data.frame': 189 obs. of 10 variables:
## $ low : int 0 0 0 0 0 0 0 0 0 0 ...
## $ age : int 19 33 20 21 18 21 22 17 29 26 ...
## $ lwt : int 182 155 105 108 107 124 118 103 123 113 ...
## $ race : int 2 3 1 1 1 3 1 3 1 1 ...
## $ smoke: int 0 0 1 1 1 0 0 0 1 1 ...
## $ ptl : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ht : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ui : int 1 0 0 1 1 0 0 0 0 0 ...
## $ ftv : int 0 3 1 2 0 0 1 1 1 0 ...
## $ bwt : int 2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
| # | variable name | variable label | coded levels |
|---|---|---|---|
| 1 | low | indicator of birth weight less than 2.5 kg | 0, 1 |
| 2 | age | mother’s age in years | continuous variable |
| 3 | lwt | mother’s weight in pounds at last menstrual period | continuous variable |
| 4 | race | mother’s race (1 = white, 2 = black, 3 = other) | 1, 2, 3 |
| 5 | smoke | smoking status during pregnancy | 0, 1 |
| 6 | ptl | number of previous premature labours | 0, 1, 2, 3 |
| 7 | ht | history of hypertension | 0, 1 |
| 8 | ui | presence of uterine irritability | 0, 1 |
| 9 | ftv | number of physician visits during the first trimester | 0, 1, 2, 3, 4, 6 |
| 10 | bwt | birth weight in grams | continuous variable |
Analyze the interrelationship of the outcome variable (i.e., low) in relation to other (categorical and continous) variables.
write_csv(birthwt, "birthwt.csv")
tbwt <- read_csv("datalib/birthwt.csv")
str(tbwt)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 189 obs. of 10 variables:
## $ low : num 0 0 0 0 0 0 0 0 0 0 ...
## $ age : num 19 33 20 21 18 21 22 17 29 26 ...
## $ lwt : num 182 155 105 108 107 124 118 103 123 113 ...
## $ race : num 2 3 1 1 1 3 1 3 1 1 ...
## $ smoke: num 0 0 1 1 1 0 0 0 1 1 ...
## $ ptl : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ht : num 0 0 0 0 0 0 0 0 0 0 ...
## $ ui : num 1 0 0 1 1 0 0 0 0 0 ...
## $ ftv : num 0 3 1 2 0 0 1 1 1 0 ...
## $ bwt : num 2523 2551 2557 2594 2600 ...
## - attr(*, "spec")=
## .. cols(
## .. low = col_double(),
## .. age = col_double(),
## .. lwt = col_double(),
## .. race = col_double(),
## .. smoke = col_double(),
## .. ptl = col_double(),
## .. ht = col_double(),
## .. ui = col_double(),
## .. ftv = col_double(),
## .. bwt = col_double()
## .. )
tbwt <- tbwt %>% mutate(
low=as_factor(low),
race=as_factor(race),
smoke=as_factor(smoke),
ptl=as_factor(ptl),
ht=as_factor(ht),
ui=as_factor(ui),
ftv=as_factor(ftv))
head(tbwt)
| low | age | lwt | race | smoke | ptl | ht | ui | ftv | bwt |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 19 | 182 | 2 | 0 | 0 | 0 | 1 | 0 | 2523 |
| 0 | 33 | 155 | 3 | 0 | 0 | 0 | 0 | 3 | 2551 |
| 0 | 20 | 105 | 1 | 1 | 0 | 0 | 0 | 1 | 2557 |
| 0 | 21 | 108 | 1 | 1 | 0 | 0 | 1 | 2 | 2594 |
| 0 | 18 | 107 | 1 | 1 | 0 | 0 | 1 | 0 | 2600 |
| 0 | 21 | 124 | 3 | 0 | 0 | 0 | 0 | 0 | 2622 |
tbwt <- tbwt %>% dplyr::select(-10)
head(tbwt)
| low | age | lwt | race | smoke | ptl | ht | ui | ftv |
|---|---|---|---|---|---|---|---|---|
| 0 | 19 | 182 | 2 | 0 | 0 | 0 | 1 | 0 |
| 0 | 33 | 155 | 3 | 0 | 0 | 0 | 0 | 3 |
| 0 | 20 | 105 | 1 | 1 | 0 | 0 | 0 | 1 |
| 0 | 21 | 108 | 1 | 1 | 0 | 0 | 1 | 2 |
| 0 | 18 | 107 | 1 | 1 | 0 | 0 | 1 | 0 |
| 0 | 21 | 124 | 3 | 0 | 0 | 0 | 0 | 0 |
grid.arrange(
ggplot(data=tbwt, mapping=aes(x=low, fill=low)) +
geom_bar(),
ggplot(data=tbwt, mapping=aes(x=low, y = ..prop.., group=1)) +
geom_bar(),
ggplot(data=tbwt, mapping=aes(x=race, fill=race)) +
geom_bar(),
ggplot(data=tbwt, mapping=aes(x=race, y = ..prop.., group=1)) +
geom_bar(),
ggplot(data=tbwt, mapping=aes(x=smoke, fill=smoke)) +
geom_bar(),
ggplot(data=tbwt, mapping=aes(x=smoke, y = ..prop.., group=1)) +
geom_bar(),
nrow=3,
top="Bar plots for categorical variables")
identity, fill and dodge methodgrid.arrange(
ggplot(data=tbwt, mapping=aes(x=low, fill=race)) +
geom_bar(position = "identity"),
ggplot(data=tbwt, mapping=aes(x=low, fill=race)) +
geom_bar(position = "fill") +
scale_y_continuous(name="Prop"),
ggplot(data=tbwt, mapping=aes(x=low, fill=race)) +
geom_bar(position = "dodge" ),
nrow=1,
top='Bar plots with "identity", "fill" and "dodge" position ')
identity, fill and dodge method but are flippedgrid.arrange(
ggplot(data=tbwt, mapping=aes(x=low, fill=ftv)) +
geom_bar(position="identity") +
coord_flip() +
theme(legend.position = "top"),
ggplot(data=tbwt, mapping=aes(x=low, fill=ftv)) +
geom_bar(position="fill") +
coord_flip() +
theme(legend.position = "top") +
scale_y_continuous(name="Prop"),
ggplot(data=tbwt, mapping=aes(x=low, fill=ftv)) +
geom_bar(position="dodge") +
coord_flip() +
theme(legend.position = "top"),
nrow=1,
top="Bar plots with flipped coordinates")
Keep an eye on the y-axis.
grid.arrange(
ggplot(tbwt, aes(x=low, fill=low)) +
geom_bar(),
ggplot(tbwt, aes(x=low, fill=low)) +
geom_bar(aes(weight =age)) +
scale_y_continuous(name="age"),
ggplot(tbwt, aes(x=low, fill=low)) +
geom_bar(aes(fill=low)),
ggplot(tbwt, aes(x=low, fill=low)) +
geom_bar(aes(weight =lwt)) +
scale_y_continuous(name="lwt"),
nrow=2,
top="Weighted vs non-weighted bar plots")