R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2) 
library(rmarkdown) 

library(readxl) 
bdims <- read_csv("C:/Users/thyagu/rmit/applied analytics/assign3/bdims.csv")
## Parsed with column specification:
## cols(
##   .default = col_double()
## )
## See spec(...) for full column specifications.
class(bdims$wgt)
## [1] "numeric"
class(bdims$hgt)
## [1] "numeric"
sum(is.na(bdims$hgt))
## [1] 0
sum(is.na(bdims$wgt))  
## [1] 0
#boxplot
boxplot(bdims$che.di,main = "Chest diameter")

#boxplot
boxplot(bdims$hgt,main = "Height") 

bdims_summary <- bdims %>%
  summarize(N = n(), r = cor(che.di, hgt), mean_hgt = mean(hgt),
            sd_hgt = sd(hgt),Max_hgt=max(hgt),IQR_hgt = IQR(hgt),mean_chestdi = mean(che.di), sd_chestdi = sd(che.di),Max_chestdi=max(che.di),IQR_chestdi = IQR(che.di))
bdims_summary 
## # A tibble: 1 x 10
##       N     r mean_hgt sd_hgt Max_hgt IQR_hgt mean_chestdi sd_chestdi
##   <int> <dbl>    <dbl>  <dbl>   <dbl>   <dbl>        <dbl>      <dbl>
## 1   507 0.627     171.   9.41    198.      14         28.0       2.74
## # ... with 2 more variables: Max_chestdi <dbl>, IQR_chestdi <dbl>
#
benchmark_chest <- quantile(bdims$che.di,probs=.75,na.rm=TRUE) + 1.5*IQR(bdims$che.di)
benchmark_chest
##  75% 
## 36.4
#
benchmark_hgt <- quantile(bdims$hgt,probs=.75,na.rm=TRUE) + 1.5*IQR(bdims$hgt)
benchmark_hgt
##   75% 
## 198.8
# bdims_summary %>%
#   mutate(slope = r*sd_chestdi/sd_hgt, 
#          intercept = mean_chestdi - slope*mean_hgt) 

#Formulas
sum_x <- sum(bdims$hgt)
sum_y <- sum(bdims$che.di) 
sum_x_sq <- sum(bdims$hgt^2)
sum_y_sq <- sum(bdims$che.di^2)
sum_xy <- sum(bdims$hgt*bdims$che.di)
n <- length(bdims$che.di) #Sample size
Lxx <- sum_x_sq-((sum_x^2)/n)
Lyy <- sum_y_sq-((sum_y^2)/n)
Lxy = sum_xy - (((sum_x)*(sum_y))/n)
b = Lxy/Lxx 
a = mean(bdims$che.di - b*mean(bdims$hgt))
print(a)
## [1] -3.294657
print(b)
## [1] 0.1827027
plot(che.di ~ hgt, data = bdims, xlab = "Height", ylab = "Chest diameter")
abline(a = a, b = b, col= "red")

#lm model

heightchestmodel <- lm(che.di ~ hgt, data = bdims)
heightchestmodel %>% summary() 
## 
## Call:
## lm(formula = che.di ~ hgt, data = bdims)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.3102 -1.4326 -0.0696  1.4168  6.8929 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -3.2947     1.7319  -1.902   0.0577 .  
## hgt           0.1827     0.0101  18.082   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.138 on 505 degrees of freedom
## Multiple R-squared:  0.393,  Adjusted R-squared:  0.3918 
## F-statistic:   327 on 1 and 505 DF,  p-value: < 2.2e-16
#p-value of the observed F statistic 
pf(q = 327,1,505,lower.tail = FALSE)
## [1] 1.00343e-56
#RegSS and ResSS values 
heightchestmodel %>% anova()
## Analysis of Variance Table
## 
## Response: che.di
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## hgt         1 1494.7 1494.73  326.95 < 2.2e-16 ***
## Residuals 505 2308.7    4.57                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#coefficient table to report sample estimates for constant a and slope b 

heightchestmodel %>% summary() %>% coef()
##               Estimate Std. Error   t value     Pr(>|t|)
## (Intercept) -3.2946566  1.7318756 -1.902363 5.769227e-02
## hgt          0.1827027  0.0101042 18.081859 1.017694e-56
#confint() function to capture a and b intervals 
heightchestmodel %>% confint() 
##                  2.5 %    97.5 %
## (Intercept) -6.6972252 0.1079121
## hgt          0.1628512 0.2025541
#for B slope
2*pt(q = 18.08,df = 505, lower.tail=FALSE)
## [1] 1.038739e-56
#plot to summarise the relationship
plot(che.di ~ hgt, data = bdims, xlab = "height", ylab = "chest diameter")
abline(heightchestmodel, col = "red") 

#Assumptions plot

plot(heightchestmodel)

#Correlation
cor(bdims$che.di,bdims$hgt)
## [1] 0.6268931
#install.packages("Hmisc")
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
#full correlation analysis between hgt and Che_di in R by installing the Hmisc library and using the rcorr() function.
bivariate<-as.matrix(dplyr::select(bdims, che.di,hgt)) #Create a matrix of the variables to be correlated
rcorr(bivariate, type = "pearson") 
##        che.di  hgt
## che.di   1.00 0.63
## hgt      0.63 1.00
## 
## n= 507 
## 
## 
## P
##        che.di hgt
## che.di         0 
## hgt     0
#install.packages("psychometric")
library(psychometric)
## Loading required package: multilevel
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Attaching package: 'psychometric'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
r=cor(bdims$che.di,bdims$hgt)
CIr(r = r, n = 507, level = .95)
## [1] 0.5709813 0.6770164

```

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.