birthwt(Risk Factors Associated with Low Infant Birth Weight)
Description
The birthwt data frame has 189 rows and 10 columns. The data were collected at Baystate Medical Center, Springfield, Mass during 1986.
In this R project I will use the birthwt dataset to see if there is any raltionship between mother smoking during pregnancy and baby being under weight at birth.
required packages
require(ggplot2)
## Loading required package: ggplot2
library(ggplot2)
require(RCurl)
## Loading required package: RCurl
## Loading required package: bitops
library(RCurl)
pulling data
url <- getURL("https://raw.githubusercontent.com/choudhury1023/CUNY-MSDA-Bridge-Workshop-R/master/birthwt.csv")
birthwt <- read.csv(text = url)
birthwt <- birthwt[,c(2, 6, 11)]
rename columns
names(birthwt) <- c("under_weight", "smoker", "birth_weight")
head(birthwt)
## under_weight smoker birth_weight
## 1 0 0 2523
## 2 0 0 2551
## 3 0 1 2557
## 4 0 1 2594
## 5 0 1 2600
## 6 0 0 2622
change values
birthwt$smoker <- factor(c("No", "Yes")[birthwt$smoker + 1])
birthwt$under_weight <- factor(c("No", "Yes")[birthwt$under_weight + 1])
summary(birthwt)
## under_weight smoker birth_weight
## No :130 No :115 Min. : 709
## Yes: 59 Yes: 74 1st Qu.:2414
## Median :2977
## Mean :2945
## 3rd Qu.:3487
## Max. :4990
boxplot for birth weight
ggplot(birthwt, aes(1, birth_weight)) + geom_boxplot(aes(fill = factor(smoker)))

histogrm for birth wight distribution
ggplot(birthwt, aes(x=birth_weight, fill=smoker)) + geom_histogram(binwidth=100)

add column to identify rows containing somker mother and under weight child
birthwt$smokerunderwt <- birthwt$smoker=="Yes" & birthwt$under_weight=="Yes"
birthwt
## under_weight smoker birth_weight smokerunderwt
## 1 No No 2523 FALSE
## 2 No No 2551 FALSE
## 3 No Yes 2557 FALSE
## 4 No Yes 2594 FALSE
## 5 No Yes 2600 FALSE
## 6 No No 2622 FALSE
## 7 No No 2637 FALSE
## 8 No No 2637 FALSE
## 9 No Yes 2663 FALSE
## 10 No Yes 2665 FALSE
## 11 No No 2722 FALSE
## 12 No No 2733 FALSE
## 13 No No 2751 FALSE
## 14 No No 2750 FALSE
## 15 No Yes 2769 FALSE
## 16 No Yes 2769 FALSE
## 17 No No 2778 FALSE
## 18 No Yes 2782 FALSE
## 19 No No 2807 FALSE
## 20 No Yes 2821 FALSE
## 21 No No 2835 FALSE
## 22 No No 2835 FALSE
## 23 No No 2836 FALSE
## 24 No No 2863 FALSE
## 25 No No 2877 FALSE
## 26 No No 2877 FALSE
## 27 No Yes 2906 FALSE
## 28 No No 2920 FALSE
## 29 No Yes 2920 FALSE
## 30 No No 2920 FALSE
## 31 No No 2920 FALSE
## 32 No Yes 2948 FALSE
## 33 No Yes 2948 FALSE
## 34 No No 2977 FALSE
## 35 No No 2977 FALSE
## 36 No Yes 2977 FALSE
## 37 No Yes 2977 FALSE
## 38 No Yes 2922 FALSE
## 39 No Yes 3005 FALSE
## 40 No Yes 3033 FALSE
## 41 No Yes 3042 FALSE
## 42 No No 3062 FALSE
## 43 No No 3062 FALSE
## 44 No No 3062 FALSE
## 45 No Yes 3062 FALSE
## 46 No Yes 3062 FALSE
## 47 No No 3080 FALSE
## 48 No No 3090 FALSE
## 49 No No 3090 FALSE
## 50 No Yes 3090 FALSE
## 51 No No 3100 FALSE
## 52 No No 3104 FALSE
## 53 No Yes 3132 FALSE
## 54 No Yes 3147 FALSE
## 55 No No 3175 FALSE
## 56 No No 3175 FALSE
## 57 No Yes 3203 FALSE
## 58 No No 3203 FALSE
## 59 No No 3203 FALSE
## 60 No No 3225 FALSE
## 61 No No 3225 FALSE
## 62 No No 3232 FALSE
## 63 No No 3232 FALSE
## 64 No No 3234 FALSE
## 65 No Yes 3260 FALSE
## 66 No No 3274 FALSE
## 67 No No 3274 FALSE
## 68 No Yes 3303 FALSE
## 69 No No 3317 FALSE
## 70 No No 3317 FALSE
## 71 No Yes 3317 FALSE
## 72 No Yes 3321 FALSE
## 73 No Yes 3331 FALSE
## 74 No No 3374 FALSE
## 75 No Yes 3374 FALSE
## 76 No No 3402 FALSE
## 77 No No 3416 FALSE
## 78 No Yes 3430 FALSE
## 79 No Yes 3444 FALSE
## 80 No No 3459 FALSE
## 81 No No 3460 FALSE
## 82 No No 3473 FALSE
## 83 No No 3544 FALSE
## 84 No No 3487 FALSE
## 85 No No 3544 FALSE
## 86 No Yes 3572 FALSE
## 87 No No 3572 FALSE
## 88 No No 3586 FALSE
## 89 No No 3600 FALSE
## 90 No No 3614 FALSE
## 91 No No 3614 FALSE
## 92 No No 3629 FALSE
## 93 No Yes 3629 FALSE
## 94 No Yes 3637 FALSE
## 95 No Yes 3643 FALSE
## 96 No No 3651 FALSE
## 97 No No 3651 FALSE
## 98 No Yes 3651 FALSE
## 99 No Yes 3651 FALSE
## 100 No No 3699 FALSE
## 101 No No 3728 FALSE
## 102 No Yes 3756 FALSE
## 103 No No 3770 FALSE
## 104 No No 3770 FALSE
## 105 No No 3770 FALSE
## 106 No No 3790 FALSE
## 107 No No 3799 FALSE
## 108 No No 3827 FALSE
## 109 No Yes 3856 FALSE
## 110 No No 3860 FALSE
## 111 No No 3860 FALSE
## 112 No No 3884 FALSE
## 113 No Yes 3884 FALSE
## 114 No No 3912 FALSE
## 115 No Yes 3940 FALSE
## 116 No No 3941 FALSE
## 117 No No 3941 FALSE
## 118 No No 3969 FALSE
## 119 No No 3983 FALSE
## 120 No No 3997 FALSE
## 121 No No 3997 FALSE
## 122 No No 4054 FALSE
## 123 No No 4054 FALSE
## 124 No No 4111 FALSE
## 125 No No 4153 FALSE
## 126 No No 4167 FALSE
## 127 No No 4174 FALSE
## 128 No Yes 4238 FALSE
## 129 No No 4593 FALSE
## 130 No No 4990 FALSE
## 131 Yes Yes 709 TRUE
## 132 Yes No 1021 FALSE
## 133 Yes Yes 1135 TRUE
## 134 Yes No 1330 FALSE
## 135 Yes No 1474 FALSE
## 136 Yes No 1588 FALSE
## 137 Yes No 1588 FALSE
## 138 Yes No 1701 FALSE
## 139 Yes No 1729 FALSE
## 140 Yes Yes 1790 TRUE
## 141 Yes Yes 1818 TRUE
## 142 Yes Yes 1885 TRUE
## 143 Yes No 1893 FALSE
## 144 Yes No 1899 FALSE
## 145 Yes Yes 1928 TRUE
## 146 Yes Yes 1928 TRUE
## 147 Yes No 1928 FALSE
## 148 Yes Yes 1936 TRUE
## 149 Yes No 1970 FALSE
## 150 Yes No 2055 FALSE
## 151 Yes No 2055 FALSE
## 152 Yes No 2082 FALSE
## 153 Yes Yes 2084 TRUE
## 154 Yes Yes 2084 TRUE
## 155 Yes No 2100 FALSE
## 156 Yes Yes 2125 TRUE
## 157 Yes Yes 2126 TRUE
## 158 Yes Yes 2187 TRUE
## 159 Yes No 2187 FALSE
## 160 Yes Yes 2211 TRUE
## 161 Yes Yes 2225 TRUE
## 162 Yes No 2240 FALSE
## 163 Yes No 2240 FALSE
## 164 Yes No 2282 FALSE
## 165 Yes Yes 2296 TRUE
## 166 Yes Yes 2296 TRUE
## 167 Yes No 2301 FALSE
## 168 Yes No 2325 FALSE
## 169 Yes Yes 2353 TRUE
## 170 Yes No 2353 FALSE
## 171 Yes Yes 2367 TRUE
## 172 Yes Yes 2381 TRUE
## 173 Yes Yes 2381 TRUE
## 174 Yes No 2381 FALSE
## 175 Yes No 2410 FALSE
## 176 Yes Yes 2410 TRUE
## 177 Yes Yes 2410 TRUE
## 178 Yes Yes 2414 TRUE
## 179 Yes Yes 2424 TRUE
## 180 Yes No 2438 FALSE
## 181 Yes No 2442 FALSE
## 182 Yes No 2450 FALSE
## 183 Yes Yes 2466 TRUE
## 184 Yes Yes 2466 TRUE
## 185 Yes Yes 2466 TRUE
## 186 Yes No 2495 FALSE
## 187 Yes Yes 2495 TRUE
## 188 Yes No 2495 FALSE
## 189 Yes Yes 2495 TRUE
filtering out rows non smoker
smokerwt <- birthwt$smoker== "Yes"
S <- birthwt[smokerwt,]
S
## under_weight smoker birth_weight smokerunderwt
## 3 No Yes 2557 FALSE
## 4 No Yes 2594 FALSE
## 5 No Yes 2600 FALSE
## 9 No Yes 2663 FALSE
## 10 No Yes 2665 FALSE
## 15 No Yes 2769 FALSE
## 16 No Yes 2769 FALSE
## 18 No Yes 2782 FALSE
## 20 No Yes 2821 FALSE
## 27 No Yes 2906 FALSE
## 29 No Yes 2920 FALSE
## 32 No Yes 2948 FALSE
## 33 No Yes 2948 FALSE
## 36 No Yes 2977 FALSE
## 37 No Yes 2977 FALSE
## 38 No Yes 2922 FALSE
## 39 No Yes 3005 FALSE
## 40 No Yes 3033 FALSE
## 41 No Yes 3042 FALSE
## 45 No Yes 3062 FALSE
## 46 No Yes 3062 FALSE
## 50 No Yes 3090 FALSE
## 53 No Yes 3132 FALSE
## 54 No Yes 3147 FALSE
## 57 No Yes 3203 FALSE
## 65 No Yes 3260 FALSE
## 68 No Yes 3303 FALSE
## 71 No Yes 3317 FALSE
## 72 No Yes 3321 FALSE
## 73 No Yes 3331 FALSE
## 75 No Yes 3374 FALSE
## 78 No Yes 3430 FALSE
## 79 No Yes 3444 FALSE
## 86 No Yes 3572 FALSE
## 93 No Yes 3629 FALSE
## 94 No Yes 3637 FALSE
## 95 No Yes 3643 FALSE
## 98 No Yes 3651 FALSE
## 99 No Yes 3651 FALSE
## 102 No Yes 3756 FALSE
## 109 No Yes 3856 FALSE
## 113 No Yes 3884 FALSE
## 115 No Yes 3940 FALSE
## 128 No Yes 4238 FALSE
## 131 Yes Yes 709 TRUE
## 133 Yes Yes 1135 TRUE
## 140 Yes Yes 1790 TRUE
## 141 Yes Yes 1818 TRUE
## 142 Yes Yes 1885 TRUE
## 145 Yes Yes 1928 TRUE
## 146 Yes Yes 1928 TRUE
## 148 Yes Yes 1936 TRUE
## 153 Yes Yes 2084 TRUE
## 154 Yes Yes 2084 TRUE
## 156 Yes Yes 2125 TRUE
## 157 Yes Yes 2126 TRUE
## 158 Yes Yes 2187 TRUE
## 160 Yes Yes 2211 TRUE
## 161 Yes Yes 2225 TRUE
## 165 Yes Yes 2296 TRUE
## 166 Yes Yes 2296 TRUE
## 169 Yes Yes 2353 TRUE
## 171 Yes Yes 2367 TRUE
## 172 Yes Yes 2381 TRUE
## 173 Yes Yes 2381 TRUE
## 176 Yes Yes 2410 TRUE
## 177 Yes Yes 2410 TRUE
## 178 Yes Yes 2414 TRUE
## 179 Yes Yes 2424 TRUE
## 183 Yes Yes 2466 TRUE
## 184 Yes Yes 2466 TRUE
## 185 Yes Yes 2466 TRUE
## 187 Yes Yes 2495 TRUE
## 189 Yes Yes 2495 TRUE
summary (S)
## under_weight smoker birth_weight smokerunderwt
## No :44 No : 0 Min. : 709 Mode :logical
## Yes:30 Yes:74 1st Qu.:2370 FALSE:44
## Median :2776 TRUE :30
## Mean :2772 NA's :0
## 3rd Qu.:3246
## Max. :4238
find smoker mean weight manually
smean <- mean(subset(birthwt$birth_weight, birthwt$smoker== "Yes"))
smean
## [1] 2771.919
filtering out rows smoker
nonsmokerwt <- birthwt$smoker== "No"
N <- birthwt[nonsmokerwt,]
N
## under_weight smoker birth_weight smokerunderwt
## 1 No No 2523 FALSE
## 2 No No 2551 FALSE
## 6 No No 2622 FALSE
## 7 No No 2637 FALSE
## 8 No No 2637 FALSE
## 11 No No 2722 FALSE
## 12 No No 2733 FALSE
## 13 No No 2751 FALSE
## 14 No No 2750 FALSE
## 17 No No 2778 FALSE
## 19 No No 2807 FALSE
## 21 No No 2835 FALSE
## 22 No No 2835 FALSE
## 23 No No 2836 FALSE
## 24 No No 2863 FALSE
## 25 No No 2877 FALSE
## 26 No No 2877 FALSE
## 28 No No 2920 FALSE
## 30 No No 2920 FALSE
## 31 No No 2920 FALSE
## 34 No No 2977 FALSE
## 35 No No 2977 FALSE
## 42 No No 3062 FALSE
## 43 No No 3062 FALSE
## 44 No No 3062 FALSE
## 47 No No 3080 FALSE
## 48 No No 3090 FALSE
## 49 No No 3090 FALSE
## 51 No No 3100 FALSE
## 52 No No 3104 FALSE
## 55 No No 3175 FALSE
## 56 No No 3175 FALSE
## 58 No No 3203 FALSE
## 59 No No 3203 FALSE
## 60 No No 3225 FALSE
## 61 No No 3225 FALSE
## 62 No No 3232 FALSE
## 63 No No 3232 FALSE
## 64 No No 3234 FALSE
## 66 No No 3274 FALSE
## 67 No No 3274 FALSE
## 69 No No 3317 FALSE
## 70 No No 3317 FALSE
## 74 No No 3374 FALSE
## 76 No No 3402 FALSE
## 77 No No 3416 FALSE
## 80 No No 3459 FALSE
## 81 No No 3460 FALSE
## 82 No No 3473 FALSE
## 83 No No 3544 FALSE
## 84 No No 3487 FALSE
## 85 No No 3544 FALSE
## 87 No No 3572 FALSE
## 88 No No 3586 FALSE
## 89 No No 3600 FALSE
## 90 No No 3614 FALSE
## 91 No No 3614 FALSE
## 92 No No 3629 FALSE
## 96 No No 3651 FALSE
## 97 No No 3651 FALSE
## 100 No No 3699 FALSE
## 101 No No 3728 FALSE
## 103 No No 3770 FALSE
## 104 No No 3770 FALSE
## 105 No No 3770 FALSE
## 106 No No 3790 FALSE
## 107 No No 3799 FALSE
## 108 No No 3827 FALSE
## 110 No No 3860 FALSE
## 111 No No 3860 FALSE
## 112 No No 3884 FALSE
## 114 No No 3912 FALSE
## 116 No No 3941 FALSE
## 117 No No 3941 FALSE
## 118 No No 3969 FALSE
## 119 No No 3983 FALSE
## 120 No No 3997 FALSE
## 121 No No 3997 FALSE
## 122 No No 4054 FALSE
## 123 No No 4054 FALSE
## 124 No No 4111 FALSE
## 125 No No 4153 FALSE
## 126 No No 4167 FALSE
## 127 No No 4174 FALSE
## 129 No No 4593 FALSE
## 130 No No 4990 FALSE
## 132 Yes No 1021 FALSE
## 134 Yes No 1330 FALSE
## 135 Yes No 1474 FALSE
## 136 Yes No 1588 FALSE
## 137 Yes No 1588 FALSE
## 138 Yes No 1701 FALSE
## 139 Yes No 1729 FALSE
## 143 Yes No 1893 FALSE
## 144 Yes No 1899 FALSE
## 147 Yes No 1928 FALSE
## 149 Yes No 1970 FALSE
## 150 Yes No 2055 FALSE
## 151 Yes No 2055 FALSE
## 152 Yes No 2082 FALSE
## 155 Yes No 2100 FALSE
## 159 Yes No 2187 FALSE
## 162 Yes No 2240 FALSE
## 163 Yes No 2240 FALSE
## 164 Yes No 2282 FALSE
## 167 Yes No 2301 FALSE
## 168 Yes No 2325 FALSE
## 170 Yes No 2353 FALSE
## 174 Yes No 2381 FALSE
## 175 Yes No 2410 FALSE
## 180 Yes No 2438 FALSE
## 181 Yes No 2442 FALSE
## 182 Yes No 2450 FALSE
## 186 Yes No 2495 FALSE
## 188 Yes No 2495 FALSE
summary (N)
## under_weight smoker birth_weight smokerunderwt
## No :86 No :115 Min. :1021 Mode :logical
## Yes:29 Yes: 0 1st Qu.:2509 FALSE:115
## Median :3100 NA's :0
## Mean :3056
## 3rd Qu.:3622
## Max. :4990
find non smoker mean weight manually
nonsmean <- mean(subset(birthwt$birth_weight, birthwt$smoker=="No"))
nonsmean
## [1] 3055.696
t.test
t.test (birthwt$birth_weight[birthwt$smoker == "Yes"],
birthwt$birth_weight[birthwt$smoker == "No"])
##
## Welch Two Sample t-test
##
## data: birthwt$birth_weight[birthwt$smoker == "Yes"] and birthwt$birth_weight[birthwt$smoker == "No"]
## t = -2.7299, df = 170.1, p-value = 0.007003
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -488.97860 -78.57486
## sample estimates:
## mean of x mean of y
## 2771.919 3055.696
#the t.test shows the diffrence of mean is 170.1 and a p-value of 0.007003 which is lowr than 0.05, we will reject null hypothesis and take alternative hypothesis. In conclusion we can say there is a relationshipe between mother smoking during pregnancy and baby being under weight at birth.