# Clear workspace
rm(list = ls())
# Set working directory
setwd("/media/hossein/DISK_IMG/Sungjin/Sungjin_Pig_Data_analysis/")
data_new        <- read.table("data_new.txt", quote="\"", comment.char="")
data1           <- (data_new[,2:12])
colnames(data1) <- c("ARN","SIRE", "DAM","SSIRE","SEX","year","BD","RBATCH","MPAR","TAGE","BWT")
YM              <- substr(data1$BD,1,6)
data <- cbind(YM,data1)
# required packages
 install.packages(c("ggplot2", "ggpubr", "tidyverse", "broom", "AICcmodavg"))
## Installing packages into '/home/hossein/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
# ANOVA is a statistical test for estimating how a quantitative dependent 
# variable changes according to the levels of one or more categorical independent variables. 
# ANOVA tests whether there is a difference in means of the groups at each level of the independent variable.
# The null hypothesis (H0) of the ANOVA is no difference in means, 
# and the alternate hypothesis (Ha) is that the means are different from one another.
library(ggplot2)
library(ggpubr)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ✓ purrr   0.3.4
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(broom)
library(AICcmodavg)

 data$SEX  <-factor(data$SEX)
 data$MPAR <-factor(data$MPAR)
 data$TAGE <-factor(data$TAGE)
 data$TAGE <-factor(data$RBATCH)
 data$YM   <- factor(data$YM)
 
 anova1 <- aov(BWT~SEX, data=data)
 summary(anova1)
##                Df Sum Sq Mean Sq F value Pr(>F)    
## SEX             1   29.3  29.277   197.5 <2e-16 ***
## Residuals   17003 2519.9   0.148                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 anova2 <- aov(BWT~SEX+MPAR, data=data)
 summary(anova2)
##                Df Sum Sq Mean Sq F value Pr(>F)    
## SEX             1   29.3   29.28   219.9 <2e-16 ***
## MPAR            8  257.6   32.20   241.9 <2e-16 ***
## Residuals   16995 2262.2    0.13                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 anova3 <- aov(BWT~SEX+MPAR+YM, data=data)
 summary(anova3)
##                Df Sum Sq Mean Sq F value Pr(>F)    
## SEX             1   29.3   29.28  228.22 <2e-16 ***
## MPAR            8  257.6   32.20  251.03 <2e-16 ***
## YM             50   88.5    1.77   13.79 <2e-16 ***
## Residuals   16945 2173.8    0.13                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 anova4 <- aov(BWT~SEX+MPAR+RBATCH, data=data)
 summary(anova4)
##                Df Sum Sq Mean Sq F value Pr(>F)    
## SEX             1   29.3   29.28 220.305 <2e-16 ***
## MPAR            8  257.6   32.20 242.325 <2e-16 ***
## RBATCH         56   11.2    0.20   1.504 0.0088 ** 
## Residuals   16939 2251.1    0.13                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 anova5 <- aov(BWT~SEX+MPAR+RBATCH+TAGE, data=data)
 summary(anova5) 
##                Df Sum Sq Mean Sq F value Pr(>F)    
## SEX             1   29.3   29.28 220.305 <2e-16 ***
## MPAR            8  257.6   32.20 242.325 <2e-16 ***
## RBATCH         56   11.2    0.20   1.504 0.0088 ** 
## Residuals   16939 2251.1    0.13                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 anova6 <- aov(BWT~SEX+MPAR+RBATCH+YM,data=data)
 summary(anova6)
##                Df Sum Sq Mean Sq F value  Pr(>F)    
## SEX             1   29.3   29.28 228.834 < 2e-16 ***
## MPAR            8  257.6   32.20 251.706 < 2e-16 ***
## RBATCH         56   11.2    0.20   1.563 0.00458 ** 
## YM             50   90.3    1.81  14.115 < 2e-16 ***
## Residuals   16889 2160.8    0.13                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 # Testing the best-fit model
 model.set   <-list(anova1,anova2,anova3,anova4,anova5,anova6)
 model.names <-  c("anova1","anova2","anova3","anova4","anova5","anova6")
 
 # best-fit model based on AIC
 # The Akaike information criterion (AIC) is a good test for model fit. 
 # AIC calculates the information value of each model by balancing the variation explained against the number of parameters used.
 # A lower number means more information explained!
 
 aictab(model.set,modname=model.names)
## Warning in aictab.AICaov.lm(model.set, modname = model.names): 
## Check model structure carefully as some models may be redundant
## 
## Model selection based on AICc:
## 
##          K     AICc Delta_AICc AICcWt Cum.Wt       LL
## anova3  61 13400.63       0.00      1      1 -6639.09
## anova6 117 13411.62      10.99      0      1 -6587.99
## anova2  11 13978.52     577.89      0      1 -6978.25
## anova4  67 14006.68     606.05      0      1 -6936.07
## anova5  67 14006.68     606.05      0      1 -6936.07
## anova1   3 15796.48    2395.85      0      1 -7895.24
 # Plot the best Model
 plot(anova3)