Research Project Stats Analysis

Author

N1306063

Data Exploration

Install packages

# install.packages("readxl")      # To read Excel files
# install.packages("ggplot2")     # For visual checks
# install.packages("car")         

Load packages

library(readxl)
library(ggplot2)

Load csv data file

data <- read.csv("~/Library/Mobile Documents/com~apple~CloudDocs/Documents/MSc EPHW/MSc Research Project/Data new.csv")

Structure & summaries

# View column names and types
  str(data)
'data.frame':   18 obs. of  27 variables:
 $ Horse             : chr  "Banngah" "Barbie" "Bling" "Bob" ...
 $ Age               : num  18 17 12 18 25 20 17 18 14 11 ...
 $ Sex               : chr  "G" "M" "M" "G" ...
 $ chew.min.B        : num  68 71 64 75 66 66 72 72 63 86 ...
 $ X2mm.B            : num  0.33 0.345 0.259 0.281 0.255 ...
 $ X1mm.B            : num  0.28 0.245 0.315 0.36 0.357 ...
 $ X0.5mm.B          : num  0.08 0.0545 0.0741 0.0674 0.0612 0.0755 0.0816 0.069 0.0976 0.0842 ...
 $ X0.25mm.B         : num  0.03 0.0364 0.0463 0.0225 0.0204 0.0472 0.0612 0.069 0.0732 0.0316 ...
 $ X0.08mm.B         : num  0.07 0.1091 0.1019 0.1011 0.0306 ...
 $ longest.B         : num  15.2 23 25 37.2 19.9 ...
 $ QA.1              : num  11.24 14.43 8.98 11.71 6.76 ...
 $ QB.1              : num  7.02 11.66 4.96 8.72 7.6 ...
 $ QC.1              : num  13.9 7.75 8.57 4.75 16.01 ...
 $ QD.1              : num  4.36 8.83 5.9 13.02 12.49 ...
 $ shavings.present.B: chr  "N" "Y" "Y" "Y" ...
 $ chew.min.A        : num  68 68 69 78 66 65 64 78 67 75 ...
 $ X2mm.A            : num  0.317 0.284 0.206 0.21 0.277 ...
 $ X1mm.A            : num  0.317 0.284 0.422 0.298 0.25 ...
 $ X0.5mm.A          : num  0.0792 0.1009 0.0784 0.0403 0.0536 ...
 $ X0.25mm.A         : num  0.0396 0.0459 0.0588 0.0645 0.0268 0.0283 0.0412 0.0342 0.0795 0.0254 ...
 $ X0.08mm.A         : num  0.0693 0.1193 0.1275 0.1855 0.0893 ...
 $ longest.A         : num  33.3 20.2 17.3 19.6 21.5 ...
 $ QA.2              : num  11.18 13.89 11.72 5.58 13.25 ...
 $ QB.2              : num  6.93 8.27 9.62 9.15 6.01 4.53 3.05 7.57 6.62 7.79 ...
 $ QC.2              : num  7.44 7.44 11.59 10.49 8.77 ...
 $ QD.2              : num  13.63 10.52 9.84 7.77 7.55 ...
 $ shavings.present.A: chr  "Y" "Y" "Y" "Y" ...
# Quick look at first few rows
head(data)
    Horse Age Sex chew.min.B X2mm.B X1mm.B X0.5mm.B X0.25mm.B X0.08mm.B
1 Banngah  18   G         68 0.3300 0.2800   0.0800    0.0300    0.0700
2  Barbie  17   M         71 0.3455 0.2455   0.0545    0.0364    0.1091
3   Bling  12   M         64 0.2593 0.3148   0.0741    0.0463    0.1019
4     Bob  18   G         75 0.2809 0.3596   0.0674    0.0225    0.1011
5   Bobby  25   G         66 0.2551 0.3571   0.0612    0.0204    0.0306
6  Daniel  20   G         66 0.2830 0.3208   0.0755    0.0472    0.0755
  longest.B  QA.1  QB.1  QC.1  QD.1 shavings.present.B chew.min.A X2mm.A X1mm.A
1     15.24 11.24  7.02 13.90  4.36                  N         68 0.3168 0.3168
2     22.98 14.43 11.66  7.75  8.83                  Y         68 0.2844 0.2844
3     25.04  8.98  4.96  8.57  5.90                  Y         69 0.2059 0.4216
4     37.17 11.71  8.72  4.75 13.02                  Y         78 0.2097 0.2984
5     19.90  6.76  7.60 16.01 12.49                  Y         66 0.2768 0.2500
6     22.39  5.94  9.48  7.43 22.39                  Y         65 0.2358 0.3491
  X0.5mm.A X0.25mm.A X0.08mm.A longest.A  QA.2 QB.2  QC.2  QD.2
1   0.0792    0.0396    0.0693     33.34 11.18 6.93  7.44 13.63
2   0.1009    0.0459    0.1193     20.25 13.89 8.27  7.44 10.52
3   0.0784    0.0588    0.1275     17.31 11.72 9.62 11.59  9.84
4   0.0403    0.0645    0.1855     19.58  5.58 9.15 10.49  7.77
5   0.0536    0.0268    0.0893     21.48 13.25 6.01  8.77  7.55
6   0.1038    0.0283    0.1038     24.67  8.26 4.53 18.62  9.95
  shavings.present.A
1                  Y
2                  Y
3                  Y
4                  Y
5                  N
6                  Y
# Summary statistics
summary(data)
    Horse                Age            Sex              chew.min.B   
 Length:18          Min.   : 9.00   Length:18          Min.   :62.00  
 Class :character   1st Qu.:14.00   Class :character   1st Qu.:64.50  
 Mode  :character   Median :17.50   Mode  :character   Median :70.00  
                    Mean   :16.94                      Mean   :70.06  
                    3rd Qu.:20.00                      3rd Qu.:72.75  
                    Max.   :25.00                      Max.   :86.00  
     X2mm.B           X1mm.B          X0.5mm.B         X0.25mm.B      
 Min.   :0.1917   Min.   :0.2455   Min.   :0.02730   Min.   :0.02040  
 1st Qu.:0.2554   1st Qu.:0.3013   1st Qu.:0.06275   1st Qu.:0.03272  
 Median :0.2819   Median :0.3291   Median :0.07205   Median :0.04675  
 Mean   :0.2827   Mean   :0.3315   Mean   :0.07123   Mean   :0.04718  
 3rd Qu.:0.3206   3rd Qu.:0.3590   3rd Qu.:0.08140   3rd Qu.:0.06105  
 Max.   :0.3636   Max.   :0.4182   Max.   :0.10830   Max.   :0.07500  
   X0.08mm.B         longest.B          QA.1             QB.1       
 Min.   :0.03060   Min.   :15.24   Min.   : 5.940   Min.   : 4.960  
 1st Qu.:0.07333   1st Qu.:20.03   1st Qu.: 6.830   1st Qu.: 7.133  
 Median :0.08940   Median :22.96   Median : 8.390   Median : 7.745  
 Mean   :0.08824   Mean   :24.21   Mean   : 9.714   Mean   : 8.214  
 3rd Qu.:0.10197   3rd Qu.:27.98   3rd Qu.:11.592   3rd Qu.: 9.290  
 Max.   :0.12730   Max.   :37.17   Max.   :19.920   Max.   :11.690  
      QC.1             QD.1        shavings.present.B   chew.min.A   
 Min.   : 4.750   Min.   : 3.380   Length:18          Min.   :63.00  
 1st Qu.: 6.657   1st Qu.: 5.893   Class :character   1st Qu.:66.25  
 Median : 8.145   Median : 8.920   Mode  :character   Median :69.00  
 Mean   : 8.943   Mean   : 9.996                      Mean   :69.83  
 3rd Qu.: 9.717   3rd Qu.:12.328                      3rd Qu.:72.75  
 Max.   :16.010   Max.   :22.390                      Max.   :78.00  
     X2mm.A           X1mm.A          X0.5mm.A         X0.25mm.A      
 Min.   :0.1705   Min.   :0.2500   Min.   :0.04030   Min.   :0.02540  
 1st Qu.:0.2069   1st Qu.:0.2911   1st Qu.:0.07028   1st Qu.:0.03068  
 Median :0.2529   Median :0.3238   Median :0.08040   Median :0.03810  
 Mean   :0.2543   Mean   :0.3277   Mean   :0.07910   Mean   :0.04174  
 3rd Qu.:0.3041   3rd Qu.:0.3471   3rd Qu.:0.09638   3rd Qu.:0.04972  
 Max.   :0.3390   Max.   :0.4216   Max.   :0.10380   Max.   :0.07950  
   X0.08mm.A         longest.A          QA.2             QB.2       
 Min.   :0.06800   Min.   :16.10   Min.   : 3.340   Min.   : 3.050  
 1st Qu.:0.09248   1st Qu.:18.44   1st Qu.: 5.905   1st Qu.: 6.162  
 Median :0.11150   Median :21.79   Median : 8.235   Median : 7.725  
 Mean   :0.11108   Mean   :24.53   Mean   : 8.740   Mean   : 8.052  
 3rd Qu.:0.12260   3rd Qu.:27.53   3rd Qu.:11.367   3rd Qu.: 9.502  
 Max.   :0.18550   Max.   :45.05   Max.   :13.890   Max.   :15.000  
      QC.2             QD.2        shavings.present.A
 Min.   : 5.330   Min.   : 4.680   Length:18         
 1st Qu.: 7.440   1st Qu.: 6.825   Class :character  
 Median : 8.235   Median : 7.900   Mode  :character  
 Mean   : 9.595   Mean   : 8.404                     
 3rd Qu.:11.315   3rd Qu.: 9.950                     
 Max.   :18.670   Max.   :13.630                     

Missing values

# Count missing values per column
colSums(is.na(data))
             Horse                Age                Sex         chew.min.B 
                 0                  0                  0                  0 
            X2mm.B             X1mm.B           X0.5mm.B          X0.25mm.B 
                 0                  0                  0                  0 
         X0.08mm.B          longest.B               QA.1               QB.1 
                 0                  0                  0                  0 
              QC.1               QD.1 shavings.present.B         chew.min.A 
                 0                  0                  0                  0 
            X2mm.A             X1mm.A           X0.5mm.A          X0.25mm.A 
                 0                  0                  0                  0 
         X0.08mm.A          longest.A               QA.2               QB.2 
                 0                  0                  0                  0 
              QC.2               QD.2 shavings.present.A 
                 0                  0                  0 
# Visualize missing data
# install.packages("naniar")
library(naniar)
vis_miss(data)

sapply(data, class)
             Horse                Age                Sex         chew.min.B 
       "character"          "numeric"        "character"          "numeric" 
            X2mm.B             X1mm.B           X0.5mm.B          X0.25mm.B 
         "numeric"          "numeric"          "numeric"          "numeric" 
         X0.08mm.B          longest.B               QA.1               QB.1 
         "numeric"          "numeric"          "numeric"          "numeric" 
              QC.1               QD.1 shavings.present.B         chew.min.A 
         "numeric"          "numeric"        "character"          "numeric" 
            X2mm.A             X1mm.A           X0.5mm.A          X0.25mm.A 
         "numeric"          "numeric"          "numeric"          "numeric" 
         X0.08mm.A          longest.A               QA.2               QB.2 
         "numeric"          "numeric"          "numeric"          "numeric" 
              QC.2               QD.2 shavings.present.A 
         "numeric"          "numeric"        "character" 
# Convert character to factor if needed
data$Sex <- as.factor(data$Sex)
data$Horse <- as.factor(data$Horse)
data$shavings.present.B <- as.factor(data$shavings.present.B)
data$shavings.present.A <- as.factor(data$shavings.present.A)

Outliers

# Age
boxplot(data$Age, main = "Age outliers")

boxplot(data$chew.min.B, main = "Chew/min B outliers")

boxplot(data$chew.min.A, main = "Chew/min A outliers")

boxplot(data$X1mm.B, main = "1mm B outliers")

Normality check

shapiro.test(data$Age)

    Shapiro-Wilk normality test

data:  data$Age
W = 0.98126, p-value = 0.962
shapiro.test(data$chew.min.B)

    Shapiro-Wilk normality test

data:  data$chew.min.B
W = 0.92399, p-value = 0.1521
shapiro.test(data$X2mm.B)

    Shapiro-Wilk normality test

data:  data$X2mm.B
W = 0.96569, p-value = 0.714
shapiro.test(data$X1mm.B)

    Shapiro-Wilk normality test

data:  data$X1mm.B
W = 0.97987, p-value = 0.9487
shapiro.test(data$X0.5mm.B) 

    Shapiro-Wilk normality test

data:  data$X0.5mm.B
W = 0.96658, p-value = 0.7314
shapiro.test(data$X0.25mm.B)

    Shapiro-Wilk normality test

data:  data$X0.25mm.B
W = 0.94309, p-value = 0.327
shapiro.test(data$X0.08mm.B)

    Shapiro-Wilk normality test

data:  data$X0.08mm.B
W = 0.9478, p-value = 0.3916
shapiro.test(data$longest.B)

    Shapiro-Wilk normality test

data:  data$longest.B
W = 0.93712, p-value = 0.2586
shapiro.test(data$QA.1) # Not normal

    Shapiro-Wilk normality test

data:  data$QA.1
W = 0.83151, p-value = 0.004387
shapiro.test(data$QB.1) 

    Shapiro-Wilk normality test

data:  data$QB.1
W = 0.94606, p-value = 0.3666
shapiro.test(data$QC.1) # Not normal

    Shapiro-Wilk normality test

data:  data$QC.1
W = 0.89236, p-value = 0.04237
shapiro.test(data$QD.1)

    Shapiro-Wilk normality test

data:  data$QD.1
W = 0.91078, p-value = 0.08879
shapiro.test(data$chew.min.A)

    Shapiro-Wilk normality test

data:  data$chew.min.A
W = 0.94147, p-value = 0.307
shapiro.test(data$X2mm.A)

    Shapiro-Wilk normality test

data:  data$X2mm.A
W = 0.93801, p-value = 0.2678
shapiro.test(data$X1mm.A)

    Shapiro-Wilk normality test

data:  data$X1mm.A
W = 0.95726, p-value = 0.5497
shapiro.test(data$X0.5mm.A) 

    Shapiro-Wilk normality test

data:  data$X0.5mm.A
W = 0.91568, p-value = 0.1084
shapiro.test(data$X0.25mm.A) # Not normal BUT rounded up is 0.05? 

    Shapiro-Wilk normality test

data:  data$X0.25mm.A
W = 0.89482, p-value = 0.04671
shapiro.test(data$X0.08mm.A) 

    Shapiro-Wilk normality test

data:  data$X0.08mm.A
W = 0.94548, p-value = 0.3586
shapiro.test(data$longest.A) # Not normal

    Shapiro-Wilk normality test

data:  data$longest.A
W = 0.85861, p-value = 0.01161
shapiro.test(data$QA.2) 

    Shapiro-Wilk normality test

data:  data$QA.2
W = 0.9547, p-value = 0.5034
shapiro.test(data$QB.2)

    Shapiro-Wilk normality test

data:  data$QB.2
W = 0.9655, p-value = 0.7102
shapiro.test(data$QC.2) # Not normal

    Shapiro-Wilk normality test

data:  data$QC.2
W = 0.83384, p-value = 0.004759
shapiro.test(data$QD.2)

    Shapiro-Wilk normality test

data:  data$QD.2
W = 0.95269, p-value = 0.4687

Dealing with non-normal data

Goal Parametric Test Non-Parametric Alternative
Compare 2 independent groups t.test() wilcox.test() (Mann–Whitney U test)
Compare 2 related groups paired t-test wilcox.test(paired = TRUE)
Compare >2 groups (independent) anova() kruskal.test()
Compare >2 related groups repeated measures ANOVA Friedman test (friedman.test())
Correlation cor(method = "pearson") cor(method = "spearman")

Older horse age factor (>17)

table(data$Age)

 9 11 12 14 15 17 18 20 21 22 25 
 1  1  1  3  1  2  3  3  1  1  1 
older_horses <- subset(data, Age > 17)

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
older_horses <- data %>% filter(Age > 17)

View(older_horses)

shapiro.test(older_horses$Age)

    Shapiro-Wilk normality test

data:  older_horses$Age
W = 0.86985, p-value = 0.1225
shapiro.test(older_horses$chew.min.B)

    Shapiro-Wilk normality test

data:  older_horses$chew.min.B
W = 0.95819, p-value = 0.7792
shapiro.test(older_horses$X2mm.B)

    Shapiro-Wilk normality test

data:  older_horses$X2mm.B
W = 0.94872, p-value = 0.6761
shapiro.test(older_horses$X1mm.B)

    Shapiro-Wilk normality test

data:  older_horses$X1mm.B
W = 0.97639, p-value = 0.9432
shapiro.test(older_horses$X0.5mm.B) 

    Shapiro-Wilk normality test

data:  older_horses$X0.5mm.B
W = 0.90388, p-value = 0.2754
shapiro.test(older_horses$X0.25mm.B)

    Shapiro-Wilk normality test

data:  older_horses$X0.25mm.B
W = 0.91534, p-value = 0.3551
shapiro.test(older_horses$X0.08mm.B)

    Shapiro-Wilk normality test

data:  older_horses$X0.08mm.B
W = 0.91642, p-value = 0.3635
shapiro.test(older_horses$longest.B)

    Shapiro-Wilk normality test

data:  older_horses$longest.B
W = 0.86939, p-value = 0.1211
shapiro.test(older_horses$QA.1) # Not normal

    Shapiro-Wilk normality test

data:  older_horses$QA.1
W = 0.89649, p-value = 0.2324
shapiro.test(older_horses$QB.1) 

    Shapiro-Wilk normality test

data:  older_horses$QB.1
W = 0.91649, p-value = 0.364
shapiro.test(older_horses$QC.1) 

    Shapiro-Wilk normality test

data:  older_horses$QC.1
W = 0.85315, p-value = 0.08071
shapiro.test(older_horses$QD.1) # Not normal

    Shapiro-Wilk normality test

data:  older_horses$QD.1
W = 0.87954, p-value = 0.1553
shapiro.test(older_horses$chew.min.A)

    Shapiro-Wilk normality test

data:  older_horses$chew.min.A
W = 0.91229, p-value = 0.3322
shapiro.test(older_horses$X2mm.A)

    Shapiro-Wilk normality test

data:  older_horses$X2mm.A
W = 0.95306, p-value = 0.7236
shapiro.test(older_horses$X1mm.A)

    Shapiro-Wilk normality test

data:  older_horses$X1mm.A
W = 0.99023, p-value = 0.9964
shapiro.test(older_horses$X0.5mm.A) 

    Shapiro-Wilk normality test

data:  older_horses$X0.5mm.A
W = 0.90716, p-value = 0.2965
shapiro.test(older_horses$X0.25mm.A) 

    Shapiro-Wilk normality test

data:  older_horses$X0.25mm.A
W = 0.91403, p-value = 0.3451
shapiro.test(older_horses$X0.08mm.A) 

    Shapiro-Wilk normality test

data:  older_horses$X0.08mm.A
W = 0.93168, p-value = 0.4975
shapiro.test(older_horses$longest.A) # Not normal

    Shapiro-Wilk normality test

data:  older_horses$longest.A
W = 0.85734, p-value = 0.08969
shapiro.test(older_horses$QA.2) 

    Shapiro-Wilk normality test

data:  older_horses$QA.2
W = 0.9309, p-value = 0.49
shapiro.test(older_horses$QB.2)

    Shapiro-Wilk normality test

data:  older_horses$QB.2
W = 0.96213, p-value = 0.8203
shapiro.test(older_horses$QC.2) # Not normal

    Shapiro-Wilk normality test

data:  older_horses$QC.2
W = 0.89959, p-value = 0.2496
shapiro.test(older_horses$QD.2)

    Shapiro-Wilk normality test

data:  older_horses$QD.2
W = 0.8701, p-value = 0.1233

Younger horses age factor (<= 17)

younger_horses <- subset(data, Age <= 17)

library(dplyr)
younger_horses <- data %>% filter(Age <= 17)

View(younger_horses)

shapiro.test(younger_horses$Age)

    Shapiro-Wilk normality test

data:  younger_horses$Age
W = 0.93849, p-value = 0.5661
shapiro.test(younger_horses$chew.min.B)

    Shapiro-Wilk normality test

data:  younger_horses$chew.min.B
W = 0.86687, p-value = 0.1138
shapiro.test(younger_horses$X2mm.B)

    Shapiro-Wilk normality test

data:  younger_horses$X2mm.B
W = 0.93025, p-value = 0.4837
shapiro.test(younger_horses$X1mm.B)

    Shapiro-Wilk normality test

data:  younger_horses$X1mm.B
W = 0.95345, p-value = 0.7279
shapiro.test(younger_horses$X0.5mm.B) 

    Shapiro-Wilk normality test

data:  younger_horses$X0.5mm.B
W = 0.95806, p-value = 0.7778
shapiro.test(younger_horses$X0.25mm.B)

    Shapiro-Wilk normality test

data:  younger_horses$X0.25mm.B
W = 0.90043, p-value = 0.2545
shapiro.test(younger_horses$X0.08mm.B)

    Shapiro-Wilk normality test

data:  younger_horses$X0.08mm.B
W = 0.89198, p-value = 0.2091
shapiro.test(younger_horses$longest.B)

    Shapiro-Wilk normality test

data:  younger_horses$longest.B
W = 0.9899, p-value = 0.996
shapiro.test(younger_horses$QA.1) # Not normal BUT rounded up = 0.05

    Shapiro-Wilk normality test

data:  younger_horses$QA.1
W = 0.74688, p-value = 0.004988
shapiro.test(younger_horses$QB.1) 

    Shapiro-Wilk normality test

data:  younger_horses$QB.1
W = 0.89386, p-value = 0.2185
shapiro.test(younger_horses$QC.1) 

    Shapiro-Wilk normality test

data:  younger_horses$QC.1
W = 0.94953, p-value = 0.6849
shapiro.test(younger_horses$QD.1) # Not normal

    Shapiro-Wilk normality test

data:  younger_horses$QD.1
W = 0.81495, p-value = 0.03021
shapiro.test(younger_horses$chew.min.A)

    Shapiro-Wilk normality test

data:  younger_horses$chew.min.A
W = 0.97794, p-value = 0.9529
shapiro.test(younger_horses$X2mm.A)

    Shapiro-Wilk normality test

data:  younger_horses$X2mm.A
W = 0.92091, p-value = 0.3998
shapiro.test(younger_horses$X1mm.A)

    Shapiro-Wilk normality test

data:  younger_horses$X1mm.A
W = 0.88376, p-value = 0.1719
shapiro.test(younger_horses$X0.5mm.A) 

    Shapiro-Wilk normality test

data:  younger_horses$X0.5mm.A
W = 0.90584, p-value = 0.2878
shapiro.test(younger_horses$X0.25mm.A) 

    Shapiro-Wilk normality test

data:  younger_horses$X0.25mm.A
W = 0.87347, p-value = 0.1339
shapiro.test(younger_horses$X0.08mm.A) 

    Shapiro-Wilk normality test

data:  younger_horses$X0.08mm.A
W = 0.83943, p-value = 0.05691
shapiro.test(younger_horses$longest.A) # Not normal

    Shapiro-Wilk normality test

data:  younger_horses$longest.A
W = 0.73884, p-value = 0.004023
shapiro.test(younger_horses$QA.2) 

    Shapiro-Wilk normality test

data:  younger_horses$QA.2
W = 0.95227, p-value = 0.715
shapiro.test(younger_horses$QB.2)

    Shapiro-Wilk normality test

data:  younger_horses$QB.2
W = 0.97127, p-value = 0.9054
shapiro.test(younger_horses$QC.2) # Not normal

    Shapiro-Wilk normality test

data:  younger_horses$QC.2
W = 0.7771, p-value = 0.01115
shapiro.test(younger_horses$QD.2)

    Shapiro-Wilk normality test

data:  younger_horses$QD.2
W = 0.90644, p-value = 0.2917