R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ##Install Packages set.seed(1) install.packages(“ggplot2”) library(ggplot2) install.packages(“data.table”) library(data.table) install.packages(“bit64”) library(bit64) install.packages(“corrplot”) library(corrplot) install.packages(“ggthemes”) library(ggthemes) install.packages(“car”) library(car) # for avplots install.packages(“tidyverse”) library(tidyverse)

#Import primary and combine data as needed
jt_BMI<-read.csv("/Users/JLu/Desktop/510 Feyzi/510 Final Project/Jintian_BMI.csv")
nk_BMI<-read.csv("/Users/JLu/Desktop/510 Feyzi/510 Final Project/Nikita_BMI.csv")
ps_BMI<-read.csv("/Users/JLu/Desktop/510 Feyzi/510 Final Project/Pushya_BMI.csv")
names(nk_BMI)<-names(jt_BMI)
names(ps_BMI)<-names(jt_BMI)
prime_BMI<-rbind(jt_BMI,nk_BMI,ps_BMI)
names(prime_BMI) <- c("Date","weight1","wakeup", "weight2","bedtime","steps","workout_h",
                  "stress_level","calories_intake","name","sleep_h","Weight_avgkg","Height_m", "BMI")
str(prime_BMI$sleep_h)
##  num [1:201] 7 7.5 9.5 7 7 7.25 6.25 8 6 5.9 ...
validation<-read.csv("/Users/JLu/Desktop/510 Feyzi/510 Final Project/data2.csv")
validation$weight_lb<-2.20462*validation$weight_kg
#Inital Analysis
summary(prime_BMI)
##       Date        weight1          wakeup      weight2         bedtime  
##  3/29/18:  3   Min.   :111.0   6:35   :29   Min.   :111.8   23:00  :36  
##  3/30/18:  3   1st Qu.:113.5   7:00   :27   1st Qu.:114.4   1:00   :32  
##  3/31/18:  3   Median :139.8   8:00   :25   Median :140.4   23:30  :11  
##  4/1/18 :  3   Mean   :133.8   8:30   :19   Mean   :134.7   0:05   : 9  
##  4/10/18:  3   3rd Qu.:147.5   11:00  :12   3rd Qu.:148.0   0:30   : 9  
##  4/11/18:  3   Max.   :151.0   7:30   :11   Max.   :152.0   0:00   : 8  
##  (Other):183                   (Other):78                   (Other):96  
##      steps         workout_h       stress_level    calories_intake name   
##  Min.   :   97   Min.   : 0.000   Min.   : 1.000   Min.   :1300    JT:67  
##  1st Qu.: 3326   1st Qu.: 0.000   1st Qu.: 4.000   1st Qu.:1755    NK:67  
##  Median : 5003   Median : 0.000   Median : 5.000   Median :2220    PS:67  
##  Mean   : 5843   Mean   : 1.975   Mean   : 5.453   Mean   :2324           
##  3rd Qu.: 7803   3rd Qu.: 1.000   3rd Qu.: 7.000   3rd Qu.:2825           
##  Max.   :27618   Max.   :60.000   Max.   :10.000   Max.   :3850           
##                                                    NA's   :134            
##     sleep_h        Weight_avgkg      Height_m          BMI       
##  Min.   : 4.130   Min.   :50.92   Min.   :1.600   Min.   :19.89  
##  1st Qu.: 6.380   1st Qu.:51.69   1st Qu.:1.600   1st Qu.:20.19  
##  Median : 7.500   Median :63.50   Median :1.600   Median :24.67  
##  Mean   : 7.546   Mean   :60.89   Mean   :1.617   Mean   :23.25  
##  3rd Qu.: 8.750   3rd Qu.:67.02   3rd Qu.:1.651   3rd Qu.:24.86  
##  Max.   :12.000   Max.   :68.72   Max.   :1.651   Max.   :25.21  
## 

Including Plots

#For all three people, plot the Relationship between sleep hours and BMI
library(ggplot2)
ggplot(prime_BMI, aes(x=sleep_h, y=BMI)) +
  geom_point(size=2, shape=18, color="darkred") +
geom_smooth(method="loess", se=TRUE, fullrange=FALSE, level=0.80)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

#For all three people, using Correlation Coefficient Formula to find the relationship that Sleep hours and BMI may have
cor(prime_BMI$sleep_h, prime_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## [1] 0.6215561
cor.test(prime_BMI$sleep_h, prime_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## 
##  Pearson's product-moment correlation
## 
## data:  prime_BMI$sleep_h and prime_BMI$BMI
## t = 11.193, df = 199, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5286346 0.6997576
## sample estimates:
##       cor 
## 0.6215561
#Scatter Plot For Jintian
library(ggplot2)
ggplot(jt_BMI, aes(x=sleep_h, y=BMI)) +
  geom_point(size=2, shape=18, color="lightgreen") +
geom_smooth(method="loess", se=TRUE, fullrange=FALSE, level=0.80)

#Scatter Plot For Nikita
ggplot(nk_BMI, aes(x=sleep_h, y=BMI)) +
  geom_point(size=2, shape=18, color="darkgreen") +
geom_smooth(method="loess", se=TRUE, fullrange=FALSE, level=0.80)

#Scatter Plot For Pushya
ggplot(ps_BMI, aes(x=sleep_h, y=BMI)) +
  geom_point(size=2, shape=18, color="darkgreen") +
geom_smooth(method="loess", se=TRUE, fullrange=FALSE, level=0.80)

#For Jintian's data, using Correlation Coefficient Formula to find the relationship that Sleep hours and BMI may have
cor(jt_BMI$sleep_h, jt_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## [1] 0.2476572
cor.test(jt_BMI$sleep_h, jt_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## 
##  Pearson's product-moment correlation
## 
## data:  jt_BMI$sleep_h and jt_BMI$BMI
## t = 2.0609, df = 65, p-value = 0.04332
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.007919745 0.460472610
## sample estimates:
##       cor 
## 0.2476572
#For Nikika's data, using Correlation Coefficient Formula to find the relationship that Sleep hours and BMI may have
cor(nk_BMI$sleep_h, nk_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## [1] 0.03322589
cor.test(nk_BMI$sleep_h, nk_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## 
##  Pearson's product-moment correlation
## 
## data:  nk_BMI$sleep_h and nk_BMI$BMI
## t = 0.26802, df = 65, p-value = 0.7895
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2086480  0.2712695
## sample estimates:
##        cor 
## 0.03322589
#For Pushya's data, using Correlation Coefficient Formula to find the relationship that Sleep hours and BMI may have
cor(ps_BMI$sleep_h, ps_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## [1] 0.05271192
cor.test(ps_BMI$sleep_h, ps_BMI$BMI, method=c("pearson", "kendall", "spearman"))
## 
##  Pearson's product-moment correlation
## 
## data:  ps_BMI$sleep_h and ps_BMI$BMI
## t = 0.42557, df = 65, p-value = 0.6718
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1899012  0.2892580
## sample estimates:
##        cor 
## 0.05271192
# Use notched box plot, In the notched boxplot, if two boxes' notches do not overlap this is ‘strong evidence’ their medians differ
#Boxplot Analysis for how sleep hours would affect BMI for Jintian
boxplot(BMI~sleep_h, data=jt_BMI, notch=TRUE, 
  col=(c("gold","darkgreen")),
  main="Jintian's BMI in different Sleep Hr", xlab="Hours of Sleep")
## Warning in bxp(structure(list(stats = structure(c(20.23449619,
## 20.23449619, : some notches went outside hinges ('box'): maybe set
## notch=FALSE

#Boxplot Analysis for how sleep hours would affect BMI for Nikita
boxplot(BMI~sleep_h, data=nk_BMI, notch=TRUE, 
  col=(c("yellow","red")),
  main="Nikita's BMI in different Sleep Hr", xlab="Hours of Sleep")
## Warning in bxp(structure(list(stats = structure(c(24.58666365,
## 24.58666365, : some notches went outside hinges ('box'): maybe set
## notch=FALSE

#Boxplot Analysis for how sleep hours would affect BMI for Pushya
boxplot(BMI~sleep_h, data=ps_BMI, notch=TRUE, 
  col=(c("blue","pink")),
  main="Pushya's BMI in different Sleep Hr", xlab="Hours of Sleep")
## Warning in bxp(structure(list(stats = structure(c(24.80586223,
## 24.81472147, : some notches went outside hinges ('box'): maybe set
## notch=FALSE