# Clear workspace
rm(list = ls())
# Set working directory
setwd("/media/hossein/DISK_IMG/Sungjin/Sungjin_Pig_Data_analysis/")
data_new <- read.table("data_new.txt", quote="\"", comment.char="")
data1 <- (data_new[,2:12])
colnames(data1) <- c("ARN","SIRE", "DAM","SSIRE","SEX","year","BD","RBATCH","MPAR","TAGE","BWT")
YM <- substr(data1$BD,1,6)
data <- cbind(YM,data1)
# required packages
install.packages(c("ggplot2", "ggpubr", "tidyverse", "broom", "AICcmodavg"))
## Installing packages into '/home/hossein/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
# ANOVA is a statistical test for estimating how a quantitative dependent
# variable changes according to the levels of one or more categorical independent variables.
# ANOVA tests whether there is a difference in means of the groups at each level of the independent variable.
# The null hypothesis (H0) of the ANOVA is no difference in means,
# and the alternate hypothesis (Ha) is that the means are different from one another.
library(ggplot2)
library(ggpubr)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(broom)
library(AICcmodavg)
data$SEX <-factor(data$SEX)
data$MPAR <-factor(data$MPAR)
data$TAGE <-factor(data$TAGE)
data$TAGE <-factor(data$RBATCH)
data$YM <- factor(data$YM)
anova1 <- aov(BWT~SEX, data=data)
summary(anova1)
## Df Sum Sq Mean Sq F value Pr(>F)
## SEX 1 29.3 29.277 197.5 <2e-16 ***
## Residuals 17003 2519.9 0.148
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova2 <- aov(BWT~SEX+MPAR, data=data)
summary(anova2)
## Df Sum Sq Mean Sq F value Pr(>F)
## SEX 1 29.3 29.28 219.9 <2e-16 ***
## MPAR 8 257.6 32.20 241.9 <2e-16 ***
## Residuals 16995 2262.2 0.13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova3 <- aov(BWT~SEX+MPAR+YM, data=data)
summary(anova3)
## Df Sum Sq Mean Sq F value Pr(>F)
## SEX 1 29.3 29.28 228.22 <2e-16 ***
## MPAR 8 257.6 32.20 251.03 <2e-16 ***
## YM 50 88.5 1.77 13.79 <2e-16 ***
## Residuals 16945 2173.8 0.13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova4 <- aov(BWT~SEX+MPAR+RBATCH, data=data)
summary(anova4)
## Df Sum Sq Mean Sq F value Pr(>F)
## SEX 1 29.3 29.28 220.305 <2e-16 ***
## MPAR 8 257.6 32.20 242.325 <2e-16 ***
## RBATCH 56 11.2 0.20 1.504 0.0088 **
## Residuals 16939 2251.1 0.13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova5 <- aov(BWT~SEX+MPAR+RBATCH+TAGE, data=data)
summary(anova5)
## Df Sum Sq Mean Sq F value Pr(>F)
## SEX 1 29.3 29.28 220.305 <2e-16 ***
## MPAR 8 257.6 32.20 242.325 <2e-16 ***
## RBATCH 56 11.2 0.20 1.504 0.0088 **
## Residuals 16939 2251.1 0.13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova6 <- aov(BWT~SEX+MPAR+RBATCH+YM,data=data)
summary(anova6)
## Df Sum Sq Mean Sq F value Pr(>F)
## SEX 1 29.3 29.28 228.834 < 2e-16 ***
## MPAR 8 257.6 32.20 251.706 < 2e-16 ***
## RBATCH 56 11.2 0.20 1.563 0.00458 **
## YM 50 90.3 1.81 14.115 < 2e-16 ***
## Residuals 16889 2160.8 0.13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Testing the best-fit model
model.set <-list(anova1,anova2,anova3,anova4,anova5,anova6)
model.names <- c("anova1","anova2","anova3","anova4","anova5","anova6")
# best-fit model based on AIC
# The Akaike information criterion (AIC) is a good test for model fit.
# AIC calculates the information value of each model by balancing the variation explained against the number of parameters used.
# A lower number means more information explained!
aictab(model.set,modname=model.names)
## Warning in aictab.AICaov.lm(model.set, modname = model.names):
## Check model structure carefully as some models may be redundant
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## anova3 61 13400.63 0.00 1 1 -6639.09
## anova6 117 13411.62 10.99 0 1 -6587.99
## anova2 11 13978.52 577.89 0 1 -6978.25
## anova4 67 14006.68 606.05 0 1 -6936.07
## anova5 67 14006.68 606.05 0 1 -6936.07
## anova1 3 15796.48 2395.85 0 1 -7895.24
# Plot the best Model
plot(anova3)



