This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ##Install Packages set.seed(1) install.packages(“ggplot2”) library(ggplot2) install.packages(“data.table”) library(data.table) install.packages(“bit64”) library(bit64) install.packages(“corrplot”) library(corrplot) install.packages(“ggthemes”) library(ggthemes) install.packages(“car”) library(car) # for avplots install.packages(“tidyverse”) library(tidyverse)
#Import primary and combine data as needed
jt_BMI<-read.csv("/Users/JLu/Desktop/510 Feyzi/510 Final Project/Jintian_BMI.csv")
nk_BMI<-read.csv("/Users/JLu/Desktop/510 Feyzi/510 Final Project/Nikita_BMI.csv")
ps_BMI<-read.csv("/Users/JLu/Desktop/510 Feyzi/510 Final Project/Pushya_BMI.csv")
names(nk_BMI)<-names(jt_BMI)
names(ps_BMI)<-names(jt_BMI)
prime_BMI<-rbind(jt_BMI,nk_BMI,ps_BMI)
names(prime_BMI) <- c("Date","weight1","wakeup", "weight2","bedtime","steps","workout_h",
"stress_level","calories_intake","name","sleep_h","Weight_avgkg","Height_m", "BMI")
str(prime_BMI$sleep_h)
## num [1:201] 7 7.5 9.5 7 7 7.25 6.25 8 6 5.9 ...
validation<-read.csv("/Users/JLu/Desktop/510 Feyzi/510 Final Project/data2.csv")
validation$weight_lb<-2.20462*validation$weight_kg
#Inital Analysis
summary(prime_BMI)
## Date weight1 wakeup weight2 bedtime
## 3/29/18: 3 Min. :111.0 6:35 :29 Min. :111.8 23:00 :36
## 3/30/18: 3 1st Qu.:113.5 7:00 :27 1st Qu.:114.4 1:00 :32
## 3/31/18: 3 Median :139.8 8:00 :25 Median :140.4 23:30 :11
## 4/1/18 : 3 Mean :133.8 8:30 :19 Mean :134.7 0:05 : 9
## 4/10/18: 3 3rd Qu.:147.5 11:00 :12 3rd Qu.:148.0 0:30 : 9
## 4/11/18: 3 Max. :151.0 7:30 :11 Max. :152.0 0:00 : 8
## (Other):183 (Other):78 (Other):96
## steps workout_h stress_level calories_intake name
## Min. : 97 Min. : 0.000 Min. : 1.000 Min. :1300 JT:67
## 1st Qu.: 3326 1st Qu.: 0.000 1st Qu.: 4.000 1st Qu.:1755 NK:67
## Median : 5003 Median : 0.000 Median : 5.000 Median :2220 PS:67
## Mean : 5843 Mean : 1.975 Mean : 5.453 Mean :2324
## 3rd Qu.: 7803 3rd Qu.: 1.000 3rd Qu.: 7.000 3rd Qu.:2825
## Max. :27618 Max. :60.000 Max. :10.000 Max. :3850
## NA's :134
## sleep_h Weight_avgkg Height_m BMI
## Min. : 4.130 Min. :50.92 Min. :1.600 Min. :19.89
## 1st Qu.: 6.380 1st Qu.:51.69 1st Qu.:1.600 1st Qu.:20.19
## Median : 7.500 Median :63.50 Median :1.600 Median :24.67
## Mean : 7.546 Mean :60.89 Mean :1.617 Mean :23.25
## 3rd Qu.: 8.750 3rd Qu.:67.02 3rd Qu.:1.651 3rd Qu.:24.86
## Max. :12.000 Max. :68.72 Max. :1.651 Max. :25.21
##
#For all three people, plot the Relationship between sleep hours and BMI
library(ggplot2)
ggplot(prime_BMI, aes(x=sleep_h, y=BMI)) +
geom_point(size=2, shape=18, color="darkred") +
geom_smooth(method="loess", se=TRUE, fullrange=FALSE, level=0.80)
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
#For all three people, using Correlation Coefficient Formula to find the relationship that Sleep hours and BMI may have
cor(prime_BMI$sleep_h, prime_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## [1] 0.6215561
cor.test(prime_BMI$sleep_h, prime_BMI$BMI, method=c("pearson", "kendall", "spearman"))
##
## Pearson's product-moment correlation
##
## data: prime_BMI$sleep_h and prime_BMI$BMI
## t = 11.193, df = 199, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5286346 0.6997576
## sample estimates:
## cor
## 0.6215561
#Scatter Plot For Jintian
library(ggplot2)
ggplot(jt_BMI, aes(x=sleep_h, y=BMI)) +
geom_point(size=2, shape=18, color="lightgreen") +
geom_smooth(method="loess", se=TRUE, fullrange=FALSE, level=0.80)
#Scatter Plot For Nikita
ggplot(nk_BMI, aes(x=sleep_h, y=BMI)) +
geom_point(size=2, shape=18, color="darkgreen") +
geom_smooth(method="loess", se=TRUE, fullrange=FALSE, level=0.80)
#Scatter Plot For Pushya
ggplot(ps_BMI, aes(x=sleep_h, y=BMI)) +
geom_point(size=2, shape=18, color="darkgreen") +
geom_smooth(method="loess", se=TRUE, fullrange=FALSE, level=0.80)
#For Jintian's data, using Correlation Coefficient Formula to find the relationship that Sleep hours and BMI may have
cor(jt_BMI$sleep_h, jt_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## [1] 0.2476572
cor.test(jt_BMI$sleep_h, jt_BMI$BMI, method=c("pearson", "kendall", "spearman"))
##
## Pearson's product-moment correlation
##
## data: jt_BMI$sleep_h and jt_BMI$BMI
## t = 2.0609, df = 65, p-value = 0.04332
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.007919745 0.460472610
## sample estimates:
## cor
## 0.2476572
#For Nikika's data, using Correlation Coefficient Formula to find the relationship that Sleep hours and BMI may have
cor(nk_BMI$sleep_h, nk_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## [1] 0.03322589
cor.test(nk_BMI$sleep_h, nk_BMI$BMI, method=c("pearson", "kendall", "spearman"))
##
## Pearson's product-moment correlation
##
## data: nk_BMI$sleep_h and nk_BMI$BMI
## t = 0.26802, df = 65, p-value = 0.7895
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2086480 0.2712695
## sample estimates:
## cor
## 0.03322589
#For Pushya's data, using Correlation Coefficient Formula to find the relationship that Sleep hours and BMI may have
cor(ps_BMI$sleep_h, ps_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## [1] 0.05271192
cor.test(ps_BMI$sleep_h, ps_BMI$BMI, method=c("pearson", "kendall", "spearman"))
##
## Pearson's product-moment correlation
##
## data: ps_BMI$sleep_h and ps_BMI$BMI
## t = 0.42557, df = 65, p-value = 0.6718
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1899012 0.2892580
## sample estimates:
## cor
## 0.05271192
# Use notched box plot, In the notched boxplot, if two boxes' notches do not overlap this is ‘strong evidence’ their medians differ
#Boxplot Analysis for how sleep hours would affect BMI for Jintian
boxplot(BMI~sleep_h, data=jt_BMI, notch=TRUE,
col=(c("gold","darkgreen")),
main="Jintian's BMI in different Sleep Hr", xlab="Hours of Sleep")
## Warning in bxp(structure(list(stats = structure(c(20.23449619,
## 20.23449619, : some notches went outside hinges ('box'): maybe set
## notch=FALSE
#Boxplot Analysis for how sleep hours would affect BMI for Nikita
boxplot(BMI~sleep_h, data=nk_BMI, notch=TRUE,
col=(c("yellow","red")),
main="Nikita's BMI in different Sleep Hr", xlab="Hours of Sleep")
## Warning in bxp(structure(list(stats = structure(c(24.58666365,
## 24.58666365, : some notches went outside hinges ('box'): maybe set
## notch=FALSE
#Boxplot Analysis for how sleep hours would affect BMI for Pushya
boxplot(BMI~sleep_h, data=ps_BMI, notch=TRUE,
col=(c("blue","pink")),
main="Pushya's BMI in different Sleep Hr", xlab="Hours of Sleep")
## Warning in bxp(structure(list(stats = structure(c(24.80586223,
## 24.81472147, : some notches went outside hinges ('box'): maybe set
## notch=FALSE