# install.packages("readxl") # To read Excel files
# install.packages("ggplot2") # For visual checks
# install.packages("car") Research Project Stats Analysis
Data Exploration
Install packages
Load packages
library(readxl)
library(ggplot2)Load csv data file
data <- read.csv("~/Library/Mobile Documents/com~apple~CloudDocs/Documents/MSc EPHW/MSc Research Project/Data new.csv")Structure & summaries
# View column names and types
str(data)'data.frame': 18 obs. of 27 variables:
$ Horse : chr "Banngah" "Barbie" "Bling" "Bob" ...
$ Age : num 18 17 12 18 25 20 17 18 14 11 ...
$ Sex : chr "G" "M" "M" "G" ...
$ chew.min.B : num 68 71 64 75 66 66 72 72 63 86 ...
$ X2mm.B : num 0.33 0.345 0.259 0.281 0.255 ...
$ X1mm.B : num 0.28 0.245 0.315 0.36 0.357 ...
$ X0.5mm.B : num 0.08 0.0545 0.0741 0.0674 0.0612 0.0755 0.0816 0.069 0.0976 0.0842 ...
$ X0.25mm.B : num 0.03 0.0364 0.0463 0.0225 0.0204 0.0472 0.0612 0.069 0.0732 0.0316 ...
$ X0.08mm.B : num 0.07 0.1091 0.1019 0.1011 0.0306 ...
$ longest.B : num 15.2 23 25 37.2 19.9 ...
$ QA.1 : num 11.24 14.43 8.98 11.71 6.76 ...
$ QB.1 : num 7.02 11.66 4.96 8.72 7.6 ...
$ QC.1 : num 13.9 7.75 8.57 4.75 16.01 ...
$ QD.1 : num 4.36 8.83 5.9 13.02 12.49 ...
$ shavings.present.B: chr "N" "Y" "Y" "Y" ...
$ chew.min.A : num 68 68 69 78 66 65 64 78 67 75 ...
$ X2mm.A : num 0.317 0.284 0.206 0.21 0.277 ...
$ X1mm.A : num 0.317 0.284 0.422 0.298 0.25 ...
$ X0.5mm.A : num 0.0792 0.1009 0.0784 0.0403 0.0536 ...
$ X0.25mm.A : num 0.0396 0.0459 0.0588 0.0645 0.0268 0.0283 0.0412 0.0342 0.0795 0.0254 ...
$ X0.08mm.A : num 0.0693 0.1193 0.1275 0.1855 0.0893 ...
$ longest.A : num 33.3 20.2 17.3 19.6 21.5 ...
$ QA.2 : num 11.18 13.89 11.72 5.58 13.25 ...
$ QB.2 : num 6.93 8.27 9.62 9.15 6.01 4.53 3.05 7.57 6.62 7.79 ...
$ QC.2 : num 7.44 7.44 11.59 10.49 8.77 ...
$ QD.2 : num 13.63 10.52 9.84 7.77 7.55 ...
$ shavings.present.A: chr "Y" "Y" "Y" "Y" ...
# Quick look at first few rows
head(data) Horse Age Sex chew.min.B X2mm.B X1mm.B X0.5mm.B X0.25mm.B X0.08mm.B
1 Banngah 18 G 68 0.3300 0.2800 0.0800 0.0300 0.0700
2 Barbie 17 M 71 0.3455 0.2455 0.0545 0.0364 0.1091
3 Bling 12 M 64 0.2593 0.3148 0.0741 0.0463 0.1019
4 Bob 18 G 75 0.2809 0.3596 0.0674 0.0225 0.1011
5 Bobby 25 G 66 0.2551 0.3571 0.0612 0.0204 0.0306
6 Daniel 20 G 66 0.2830 0.3208 0.0755 0.0472 0.0755
longest.B QA.1 QB.1 QC.1 QD.1 shavings.present.B chew.min.A X2mm.A X1mm.A
1 15.24 11.24 7.02 13.90 4.36 N 68 0.3168 0.3168
2 22.98 14.43 11.66 7.75 8.83 Y 68 0.2844 0.2844
3 25.04 8.98 4.96 8.57 5.90 Y 69 0.2059 0.4216
4 37.17 11.71 8.72 4.75 13.02 Y 78 0.2097 0.2984
5 19.90 6.76 7.60 16.01 12.49 Y 66 0.2768 0.2500
6 22.39 5.94 9.48 7.43 22.39 Y 65 0.2358 0.3491
X0.5mm.A X0.25mm.A X0.08mm.A longest.A QA.2 QB.2 QC.2 QD.2
1 0.0792 0.0396 0.0693 33.34 11.18 6.93 7.44 13.63
2 0.1009 0.0459 0.1193 20.25 13.89 8.27 7.44 10.52
3 0.0784 0.0588 0.1275 17.31 11.72 9.62 11.59 9.84
4 0.0403 0.0645 0.1855 19.58 5.58 9.15 10.49 7.77
5 0.0536 0.0268 0.0893 21.48 13.25 6.01 8.77 7.55
6 0.1038 0.0283 0.1038 24.67 8.26 4.53 18.62 9.95
shavings.present.A
1 Y
2 Y
3 Y
4 Y
5 N
6 Y
# Summary statistics
summary(data) Horse Age Sex chew.min.B
Length:18 Min. : 9.00 Length:18 Min. :62.00
Class :character 1st Qu.:14.00 Class :character 1st Qu.:64.50
Mode :character Median :17.50 Mode :character Median :70.00
Mean :16.94 Mean :70.06
3rd Qu.:20.00 3rd Qu.:72.75
Max. :25.00 Max. :86.00
X2mm.B X1mm.B X0.5mm.B X0.25mm.B
Min. :0.1917 Min. :0.2455 Min. :0.02730 Min. :0.02040
1st Qu.:0.2554 1st Qu.:0.3013 1st Qu.:0.06275 1st Qu.:0.03272
Median :0.2819 Median :0.3291 Median :0.07205 Median :0.04675
Mean :0.2827 Mean :0.3315 Mean :0.07123 Mean :0.04718
3rd Qu.:0.3206 3rd Qu.:0.3590 3rd Qu.:0.08140 3rd Qu.:0.06105
Max. :0.3636 Max. :0.4182 Max. :0.10830 Max. :0.07500
X0.08mm.B longest.B QA.1 QB.1
Min. :0.03060 Min. :15.24 Min. : 5.940 Min. : 4.960
1st Qu.:0.07333 1st Qu.:20.03 1st Qu.: 6.830 1st Qu.: 7.133
Median :0.08940 Median :22.96 Median : 8.390 Median : 7.745
Mean :0.08824 Mean :24.21 Mean : 9.714 Mean : 8.214
3rd Qu.:0.10197 3rd Qu.:27.98 3rd Qu.:11.592 3rd Qu.: 9.290
Max. :0.12730 Max. :37.17 Max. :19.920 Max. :11.690
QC.1 QD.1 shavings.present.B chew.min.A
Min. : 4.750 Min. : 3.380 Length:18 Min. :63.00
1st Qu.: 6.657 1st Qu.: 5.893 Class :character 1st Qu.:66.25
Median : 8.145 Median : 8.920 Mode :character Median :69.00
Mean : 8.943 Mean : 9.996 Mean :69.83
3rd Qu.: 9.717 3rd Qu.:12.328 3rd Qu.:72.75
Max. :16.010 Max. :22.390 Max. :78.00
X2mm.A X1mm.A X0.5mm.A X0.25mm.A
Min. :0.1705 Min. :0.2500 Min. :0.04030 Min. :0.02540
1st Qu.:0.2069 1st Qu.:0.2911 1st Qu.:0.07028 1st Qu.:0.03068
Median :0.2529 Median :0.3238 Median :0.08040 Median :0.03810
Mean :0.2543 Mean :0.3277 Mean :0.07910 Mean :0.04174
3rd Qu.:0.3041 3rd Qu.:0.3471 3rd Qu.:0.09638 3rd Qu.:0.04972
Max. :0.3390 Max. :0.4216 Max. :0.10380 Max. :0.07950
X0.08mm.A longest.A QA.2 QB.2
Min. :0.06800 Min. :16.10 Min. : 3.340 Min. : 3.050
1st Qu.:0.09248 1st Qu.:18.44 1st Qu.: 5.905 1st Qu.: 6.162
Median :0.11150 Median :21.79 Median : 8.235 Median : 7.725
Mean :0.11108 Mean :24.53 Mean : 8.740 Mean : 8.052
3rd Qu.:0.12260 3rd Qu.:27.53 3rd Qu.:11.367 3rd Qu.: 9.502
Max. :0.18550 Max. :45.05 Max. :13.890 Max. :15.000
QC.2 QD.2 shavings.present.A
Min. : 5.330 Min. : 4.680 Length:18
1st Qu.: 7.440 1st Qu.: 6.825 Class :character
Median : 8.235 Median : 7.900 Mode :character
Mean : 9.595 Mean : 8.404
3rd Qu.:11.315 3rd Qu.: 9.950
Max. :18.670 Max. :13.630
Missing values
# Count missing values per column
colSums(is.na(data)) Horse Age Sex chew.min.B
0 0 0 0
X2mm.B X1mm.B X0.5mm.B X0.25mm.B
0 0 0 0
X0.08mm.B longest.B QA.1 QB.1
0 0 0 0
QC.1 QD.1 shavings.present.B chew.min.A
0 0 0 0
X2mm.A X1mm.A X0.5mm.A X0.25mm.A
0 0 0 0
X0.08mm.A longest.A QA.2 QB.2
0 0 0 0
QC.2 QD.2 shavings.present.A
0 0 0
# Visualize missing data
# install.packages("naniar")
library(naniar)
vis_miss(data)sapply(data, class) Horse Age Sex chew.min.B
"character" "numeric" "character" "numeric"
X2mm.B X1mm.B X0.5mm.B X0.25mm.B
"numeric" "numeric" "numeric" "numeric"
X0.08mm.B longest.B QA.1 QB.1
"numeric" "numeric" "numeric" "numeric"
QC.1 QD.1 shavings.present.B chew.min.A
"numeric" "numeric" "character" "numeric"
X2mm.A X1mm.A X0.5mm.A X0.25mm.A
"numeric" "numeric" "numeric" "numeric"
X0.08mm.A longest.A QA.2 QB.2
"numeric" "numeric" "numeric" "numeric"
QC.2 QD.2 shavings.present.A
"numeric" "numeric" "character"
# Convert character to factor if needed
data$Sex <- as.factor(data$Sex)
data$Horse <- as.factor(data$Horse)
data$shavings.present.B <- as.factor(data$shavings.present.B)
data$shavings.present.A <- as.factor(data$shavings.present.A)Outliers
# Age
boxplot(data$Age, main = "Age outliers")boxplot(data$chew.min.B, main = "Chew/min B outliers")boxplot(data$chew.min.A, main = "Chew/min A outliers")boxplot(data$X1mm.B, main = "1mm B outliers")Normality check
shapiro.test(data$Age)
Shapiro-Wilk normality test
data: data$Age
W = 0.98126, p-value = 0.962
shapiro.test(data$chew.min.B)
Shapiro-Wilk normality test
data: data$chew.min.B
W = 0.92399, p-value = 0.1521
shapiro.test(data$X2mm.B)
Shapiro-Wilk normality test
data: data$X2mm.B
W = 0.96569, p-value = 0.714
shapiro.test(data$X1mm.B)
Shapiro-Wilk normality test
data: data$X1mm.B
W = 0.97987, p-value = 0.9487
shapiro.test(data$X0.5mm.B)
Shapiro-Wilk normality test
data: data$X0.5mm.B
W = 0.96658, p-value = 0.7314
shapiro.test(data$X0.25mm.B)
Shapiro-Wilk normality test
data: data$X0.25mm.B
W = 0.94309, p-value = 0.327
shapiro.test(data$X0.08mm.B)
Shapiro-Wilk normality test
data: data$X0.08mm.B
W = 0.9478, p-value = 0.3916
shapiro.test(data$longest.B)
Shapiro-Wilk normality test
data: data$longest.B
W = 0.93712, p-value = 0.2586
shapiro.test(data$QA.1) # Not normal
Shapiro-Wilk normality test
data: data$QA.1
W = 0.83151, p-value = 0.004387
shapiro.test(data$QB.1)
Shapiro-Wilk normality test
data: data$QB.1
W = 0.94606, p-value = 0.3666
shapiro.test(data$QC.1) # Not normal
Shapiro-Wilk normality test
data: data$QC.1
W = 0.89236, p-value = 0.04237
shapiro.test(data$QD.1)
Shapiro-Wilk normality test
data: data$QD.1
W = 0.91078, p-value = 0.08879
shapiro.test(data$chew.min.A)
Shapiro-Wilk normality test
data: data$chew.min.A
W = 0.94147, p-value = 0.307
shapiro.test(data$X2mm.A)
Shapiro-Wilk normality test
data: data$X2mm.A
W = 0.93801, p-value = 0.2678
shapiro.test(data$X1mm.A)
Shapiro-Wilk normality test
data: data$X1mm.A
W = 0.95726, p-value = 0.5497
shapiro.test(data$X0.5mm.A)
Shapiro-Wilk normality test
data: data$X0.5mm.A
W = 0.91568, p-value = 0.1084
shapiro.test(data$X0.25mm.A) # Not normal BUT rounded up is 0.05?
Shapiro-Wilk normality test
data: data$X0.25mm.A
W = 0.89482, p-value = 0.04671
shapiro.test(data$X0.08mm.A)
Shapiro-Wilk normality test
data: data$X0.08mm.A
W = 0.94548, p-value = 0.3586
shapiro.test(data$longest.A) # Not normal
Shapiro-Wilk normality test
data: data$longest.A
W = 0.85861, p-value = 0.01161
shapiro.test(data$QA.2)
Shapiro-Wilk normality test
data: data$QA.2
W = 0.9547, p-value = 0.5034
shapiro.test(data$QB.2)
Shapiro-Wilk normality test
data: data$QB.2
W = 0.9655, p-value = 0.7102
shapiro.test(data$QC.2) # Not normal
Shapiro-Wilk normality test
data: data$QC.2
W = 0.83384, p-value = 0.004759
shapiro.test(data$QD.2)
Shapiro-Wilk normality test
data: data$QD.2
W = 0.95269, p-value = 0.4687
Dealing with non-normal data
| Goal | Parametric Test | Non-Parametric Alternative |
|---|---|---|
| Compare 2 independent groups | t.test() |
wilcox.test() (Mann–Whitney U test) |
| Compare 2 related groups | paired t-test |
wilcox.test(paired = TRUE) |
| Compare >2 groups (independent) | anova() |
kruskal.test() |
| Compare >2 related groups | repeated measures ANOVA |
Friedman test (friedman.test()) |
| Correlation | cor(method = "pearson") |
cor(method = "spearman") |
Older horse age factor (>17)
table(data$Age)
9 11 12 14 15 17 18 20 21 22 25
1 1 1 3 1 2 3 3 1 1 1
older_horses <- subset(data, Age > 17)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
older_horses <- data %>% filter(Age > 17)
View(older_horses)
shapiro.test(older_horses$Age)
Shapiro-Wilk normality test
data: older_horses$Age
W = 0.86985, p-value = 0.1225
shapiro.test(older_horses$chew.min.B)
Shapiro-Wilk normality test
data: older_horses$chew.min.B
W = 0.95819, p-value = 0.7792
shapiro.test(older_horses$X2mm.B)
Shapiro-Wilk normality test
data: older_horses$X2mm.B
W = 0.94872, p-value = 0.6761
shapiro.test(older_horses$X1mm.B)
Shapiro-Wilk normality test
data: older_horses$X1mm.B
W = 0.97639, p-value = 0.9432
shapiro.test(older_horses$X0.5mm.B)
Shapiro-Wilk normality test
data: older_horses$X0.5mm.B
W = 0.90388, p-value = 0.2754
shapiro.test(older_horses$X0.25mm.B)
Shapiro-Wilk normality test
data: older_horses$X0.25mm.B
W = 0.91534, p-value = 0.3551
shapiro.test(older_horses$X0.08mm.B)
Shapiro-Wilk normality test
data: older_horses$X0.08mm.B
W = 0.91642, p-value = 0.3635
shapiro.test(older_horses$longest.B)
Shapiro-Wilk normality test
data: older_horses$longest.B
W = 0.86939, p-value = 0.1211
shapiro.test(older_horses$QA.1) # Not normal
Shapiro-Wilk normality test
data: older_horses$QA.1
W = 0.89649, p-value = 0.2324
shapiro.test(older_horses$QB.1)
Shapiro-Wilk normality test
data: older_horses$QB.1
W = 0.91649, p-value = 0.364
shapiro.test(older_horses$QC.1)
Shapiro-Wilk normality test
data: older_horses$QC.1
W = 0.85315, p-value = 0.08071
shapiro.test(older_horses$QD.1) # Not normal
Shapiro-Wilk normality test
data: older_horses$QD.1
W = 0.87954, p-value = 0.1553
shapiro.test(older_horses$chew.min.A)
Shapiro-Wilk normality test
data: older_horses$chew.min.A
W = 0.91229, p-value = 0.3322
shapiro.test(older_horses$X2mm.A)
Shapiro-Wilk normality test
data: older_horses$X2mm.A
W = 0.95306, p-value = 0.7236
shapiro.test(older_horses$X1mm.A)
Shapiro-Wilk normality test
data: older_horses$X1mm.A
W = 0.99023, p-value = 0.9964
shapiro.test(older_horses$X0.5mm.A)
Shapiro-Wilk normality test
data: older_horses$X0.5mm.A
W = 0.90716, p-value = 0.2965
shapiro.test(older_horses$X0.25mm.A)
Shapiro-Wilk normality test
data: older_horses$X0.25mm.A
W = 0.91403, p-value = 0.3451
shapiro.test(older_horses$X0.08mm.A)
Shapiro-Wilk normality test
data: older_horses$X0.08mm.A
W = 0.93168, p-value = 0.4975
shapiro.test(older_horses$longest.A) # Not normal
Shapiro-Wilk normality test
data: older_horses$longest.A
W = 0.85734, p-value = 0.08969
shapiro.test(older_horses$QA.2)
Shapiro-Wilk normality test
data: older_horses$QA.2
W = 0.9309, p-value = 0.49
shapiro.test(older_horses$QB.2)
Shapiro-Wilk normality test
data: older_horses$QB.2
W = 0.96213, p-value = 0.8203
shapiro.test(older_horses$QC.2) # Not normal
Shapiro-Wilk normality test
data: older_horses$QC.2
W = 0.89959, p-value = 0.2496
shapiro.test(older_horses$QD.2)
Shapiro-Wilk normality test
data: older_horses$QD.2
W = 0.8701, p-value = 0.1233
Younger horses age factor (<= 17)
younger_horses <- subset(data, Age <= 17)
library(dplyr)
younger_horses <- data %>% filter(Age <= 17)
View(younger_horses)
shapiro.test(younger_horses$Age)
Shapiro-Wilk normality test
data: younger_horses$Age
W = 0.93849, p-value = 0.5661
shapiro.test(younger_horses$chew.min.B)
Shapiro-Wilk normality test
data: younger_horses$chew.min.B
W = 0.86687, p-value = 0.1138
shapiro.test(younger_horses$X2mm.B)
Shapiro-Wilk normality test
data: younger_horses$X2mm.B
W = 0.93025, p-value = 0.4837
shapiro.test(younger_horses$X1mm.B)
Shapiro-Wilk normality test
data: younger_horses$X1mm.B
W = 0.95345, p-value = 0.7279
shapiro.test(younger_horses$X0.5mm.B)
Shapiro-Wilk normality test
data: younger_horses$X0.5mm.B
W = 0.95806, p-value = 0.7778
shapiro.test(younger_horses$X0.25mm.B)
Shapiro-Wilk normality test
data: younger_horses$X0.25mm.B
W = 0.90043, p-value = 0.2545
shapiro.test(younger_horses$X0.08mm.B)
Shapiro-Wilk normality test
data: younger_horses$X0.08mm.B
W = 0.89198, p-value = 0.2091
shapiro.test(younger_horses$longest.B)
Shapiro-Wilk normality test
data: younger_horses$longest.B
W = 0.9899, p-value = 0.996
shapiro.test(younger_horses$QA.1) # Not normal BUT rounded up = 0.05
Shapiro-Wilk normality test
data: younger_horses$QA.1
W = 0.74688, p-value = 0.004988
shapiro.test(younger_horses$QB.1)
Shapiro-Wilk normality test
data: younger_horses$QB.1
W = 0.89386, p-value = 0.2185
shapiro.test(younger_horses$QC.1)
Shapiro-Wilk normality test
data: younger_horses$QC.1
W = 0.94953, p-value = 0.6849
shapiro.test(younger_horses$QD.1) # Not normal
Shapiro-Wilk normality test
data: younger_horses$QD.1
W = 0.81495, p-value = 0.03021
shapiro.test(younger_horses$chew.min.A)
Shapiro-Wilk normality test
data: younger_horses$chew.min.A
W = 0.97794, p-value = 0.9529
shapiro.test(younger_horses$X2mm.A)
Shapiro-Wilk normality test
data: younger_horses$X2mm.A
W = 0.92091, p-value = 0.3998
shapiro.test(younger_horses$X1mm.A)
Shapiro-Wilk normality test
data: younger_horses$X1mm.A
W = 0.88376, p-value = 0.1719
shapiro.test(younger_horses$X0.5mm.A)
Shapiro-Wilk normality test
data: younger_horses$X0.5mm.A
W = 0.90584, p-value = 0.2878
shapiro.test(younger_horses$X0.25mm.A)
Shapiro-Wilk normality test
data: younger_horses$X0.25mm.A
W = 0.87347, p-value = 0.1339
shapiro.test(younger_horses$X0.08mm.A)
Shapiro-Wilk normality test
data: younger_horses$X0.08mm.A
W = 0.83943, p-value = 0.05691
shapiro.test(younger_horses$longest.A) # Not normal
Shapiro-Wilk normality test
data: younger_horses$longest.A
W = 0.73884, p-value = 0.004023
shapiro.test(younger_horses$QA.2)
Shapiro-Wilk normality test
data: younger_horses$QA.2
W = 0.95227, p-value = 0.715
shapiro.test(younger_horses$QB.2)
Shapiro-Wilk normality test
data: younger_horses$QB.2
W = 0.97127, p-value = 0.9054
shapiro.test(younger_horses$QC.2) # Not normal
Shapiro-Wilk normality test
data: younger_horses$QC.2
W = 0.7771, p-value = 0.01115
shapiro.test(younger_horses$QD.2)
Shapiro-Wilk normality test
data: younger_horses$QD.2
W = 0.90644, p-value = 0.2917