library(MASS)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data("anorexia")
names(anorexia)
## [1] "Treat"  "Prewt"  "Postwt"
head(anorexia)
##   Treat Prewt Postwt
## 1  Cont  80.7   80.2
## 2  Cont  89.4   80.1
## 3  Cont  91.8   86.4
## 4  Cont  74.0   86.3
## 5  Cont  78.1   76.1
## 6  Cont  88.3   78.1
tail(anorexia)
##    Treat Prewt Postwt
## 67    FT  82.1   95.5
## 68    FT  77.6   90.7
## 69    FT  83.5   92.5
## 70    FT  89.9   93.8
## 71    FT  86.0   91.7
## 72    FT  87.3   98.0
#selecting ft
ft_data <- anorexia %>%
  filter(Treat == "FT")

#dataframing

library(dplyr)

Prewt_ft <- anorexia %>%
  filter(Treat == "FT") %>%
  dplyr::select(Prewt)

Postwt_ft <- anorexia %>%
  filter(Treat == "FT") %>%
  dplyr::select(Postwt)

#add to data
Prewt_ft <- ft_data$Prewt
Postwt_ft <- ft_data$Postwt
hist(Prewt_ft, main = "Histogram of Before Treatment", xlab = "Pre-treatment weight", col = "pink")

hist(Postwt_ft, main = "Histogram of After Treatment", xlab = "After-treatment weight", col = "lightblue")

Prewt_ft_result <- shapiro.test(Prewt_ft)
Prewt_ft_result
## 
##  Shapiro-Wilk normality test
## 
## data:  Prewt_ft
## W = 0.98821, p-value = 0.9972
format(Prewt_ft_result$p.value, scientific = FALSE)
## [1] "0.9972153"
#normal distrubution

Postwt_ft_result <- shapiro.test(Postwt_ft)
Postwt_ft_result
## 
##  Shapiro-Wilk normality test
## 
## data:  Postwt_ft
## W = 0.83928, p-value = 0.007391
format(Postwt_ft_result$p.value, scientific = FALSE)
## [1] "0.007390658"
#not normal distribution
wilcox_test_result <- wilcox.test(Prewt_ft, Postwt_ft, paired = TRUE, exact = FALSE)
wilcox_test_result
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  Prewt_ft and Postwt_ft
## V = 11, p-value = 0.002091
## alternative hypothesis: true location shift is not equal to 0
#there is a statistically significant difference between the weights of Before #Q-Q plot
#Q-Q plot

qqnorm(Prewt_ft)
qqline(Prewt_ft, col = "orange")

qqnorm(Postwt_ft)
qqline(Postwt_ft, col = "red")