library(readxl)
library(ggpubr)
## Loading required package: ggplot2
DatasetC <- read_excel("C:/Users/varun/Downloads/DatasetC.xlsx")
mean(DatasetC$inches)
## [1] 69.27122
sd(DatasetC$inches)
## [1] 2.738448
mean(DatasetC$pounds)
## [1] 195.7736
sd(DatasetC$pounds)
## [1] 29.0096
hist(DatasetC$inches,
     main = "inches",
     breaks = 20,
     col = "lightblue",
     border = "white",
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)

hist(DatasetC$pounds,
     main = "pounds",
     breaks = 20,
     col = "lightcoral",
     border = "white",
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)

#The variable “inches” appears normally distributed. The data looks symmetrical (most data is in the middle).The data also appears to have a proper bell curve.

#The variable “pounds” appears is not normally distributed. The data is positively skewed (most data is on the left).

shapiro.test(DatasetC$inches) 
## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetC$inches
## W = 0.99388, p-value = 0.9349
shapiro.test(DatasetC$pounds)
## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetC$pounds
## W = 0.97289, p-value = 0.03691

#The Shaprio-Wilk p-value for inches normality test is greater than .05 (.93), so the data is normal.

#The Shaprio-Wilk p-value for pounds normality test is less than .05 (.03), so the data is NOT normal.

cor.test(DatasetC$inches, DatasetC$pounds, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  DatasetC$inches and DatasetC$pounds
## S = 166000, p-value = 0.9693
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.00390039

#The Spearman Correlation test was selected because pounds failed Shapiro-wilk normality test.

#The p-value (probability value) is 0.9693, which is above .05. This means the results are not statistically significant. The null hypothesis is supported.

#The rho-value is 0.00390039.

#The correlation is positive, which means as Height(inches) increases, Weight(pounds) increases but it is negligible or doesnot apply in this case as there is no relationship between inches and pounds.

#The correlation value falls within ± 0.00 to 0.09, which means there is no relationship.

ggscatter(
  DatasetC,
  x = "inches",
  y = "pounds",
  add = "reg.line",
  xlab = "inches",
  ylab = "pounds"
)

#The line of best fit is horizontal. This means there is no relationship between inches and pounds.

#The dots are randomly around the line, this indicates no relationship between the two variables.

#The dots form a curved pattern, the relationship is non-linear. Since the relationship is non-linear, a Spearman Correlation should be used.

#There is possibly one outlier.

library(rmarkdown)