Fish Data Understanding : > Three length measurements - Length1:
Standard Length (SL); in cm - Length2: Fork Length (FL); in cm
- Length3: Total Length (TL); in cm > Body width and body height -
Width: Body Width (BW) - Height: Body Height (BH)
library(readr)
library(dplyr) # data cleaning and manipulation
fish_df <- read.csv("Fish.csv", sep = ",") %>% # Load a built-in data set called 'fish'
select(Species, Weight, Length1, Length2, Length3, Height, Width) # Select species, weight, height, and width data
cols(
Species = col_character(),
Weight = col_double(),
Length1 = col_double(),
Length2 = col_double(),
Length3 = col_double(),
Height = col_double(),
Width = col_double()
)
## cols(
## Species = col_character(),
## Weight = col_double(),
## Length1 = col_double(),
## Length2 = col_double(),
## Length3 = col_double(),
## Height = col_double(),
## Width = col_double()
## )
fish <- fish_df %>% # change column names
rename(sl = Length1,
fl = Length2,
tl = Length3,
bw = Width,
bh = Height,
m = Weight,
s = Species)
fish_weight <- lm(Weight ~ Height + Width, data = fish_df) # Run a regression analysis
summary(fish_weight)
##
## Call:
## lm(formula = Weight ~ Height + Width, data = fish_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -249.72 -98.15 -47.17 60.29 890.42
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -433.576 37.075 -11.694 <2e-16 ***
## Height 4.825 5.063 0.953 0.342
## Width 178.523 12.872 13.869 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 166.2 on 156 degrees of freedom
## Multiple R-squared: 0.7871, Adjusted R-squared: 0.7844
## F-statistic: 288.4 on 2 and 156 DF, p-value: < 2.2e-16
resid(fish_weight)
## 1 2 3 4 5 6
## -97.6642210 -105.2818700 -124.5018448 -60.2485469 -112.9962499 -61.7023088
## 7 8 9 10 11 12
## -77.1658451 -74.8226048 -48.7197257 -20.4265437 -71.4512568 4.7249172
## 13 14 15 16 17 18
## 87.4066986 -199.1575225 38.3224508 -37.0621447 118.3172808 133.6350226
## 19 20 21 22 23 24
## 51.6538125 -8.7601933 -58.6944451 82.6756315 36.0681192 -55.5042835
## 25 26 27 28 29 30
## 57.4594128 -0.9872859 -12.5655994 23.2632469 95.5361355 163.3315457
## 31 32 33 34 35 36
## 140.7382933 178.0639270 63.1207187 114.1223350 161.2716355 48.6780490
## 37 38 39 40 41 42
## -26.7234713 -33.8251457 -73.2597423 -114.4892099 -196.0006301 -92.3899099
## 43 44 45 46 47 48
## -64.0315390 -114.6736482 -86.7924145 -122.3672312 -51.6128106 -115.6873236
## 49 50 51 52 53 54
## -118.4443404 -87.0388354 -138.9090551 -117.9232808 -122.0313875 -187.9565886
## 55 56 57 58 59 60
## -178.1737447 -95.1486422 -94.1755321 -138.5456812 -249.7246093 3.2969485
## 61 62 63 64 65 66
## 209.1131220 40.2066197 48.7065356 10.6578842 -7.8598663 -47.1674527
## 67 68 69 70 71 72
## -55.7393916 -50.5894400 -32.0119757 -70.3737480 -86.7167888 -77.1347040
## 73 74 75 76 77 78
## 177.9264478 91.6522965 20.9596638 -6.8806545 -43.6835589 -84.6006981
## 79 80 81 82 83 84
## -71.1056016 -58.1441716 -48.3483551 -0.8073410 -118.5359004 -70.4371082
## 85 86 87 88 89 90
## -123.6196852 -98.1231366 -84.2180965 -102.9269331 -95.1944940 -89.0607135
## 91 92 93 94 95 96
## -196.2656328 -111.6427768 -96.1586003 -98.1828629 -88.9241408 -91.6950590
## 97 98 99 100 101 102
## -41.2494544 -135.0297720 -154.5993867 -75.0800203 -157.8354754 -122.8043657
## 103 104 105 106 107 108
## -223.7032070 -114.8983096 -109.3405861 -166.7304424 -104.6989896 -130.5431600
## 109 110 111 112 113 114
## -135.0045966 -175.1635892 -200.2204361 -173.5592875 -159.2993838 -120.9935998
## 115 116 117 118 119 120
## -37.0710467 -64.0100284 -57.8946326 -41.8246510 -118.7908397 -42.6274941
## 121 122 123 124 125 126
## -12.8421013 56.6368014 16.2494408 245.6557491 76.8605941 149.1955861
## 127 128 129 130 131 132
## -80.7639615 17.3008391 4.0917042 -36.2588035 -77.7792078 -14.3881510
## 133 134 135 136 137 138
## 11.3960384 37.7334866 82.7890075 114.6158738 20.2622255 25.9502067
## 139 140 141 142 143 144
## 93.5777272 206.7856810 238.8121499 385.0565912 890.4171488 840.4171488
## 145 146 147 148 149 150
## 696.0636338 244.8665082 224.4754794 227.2216128 186.3198125 205.3161434
## 151 152 153 154 155 156
## 203.2945459 203.7073197 224.6551262 227.7251042 186.9013835 208.7068576
## 157 158 159
## 210.6015185 70.3738408 103.8494957
shapiro.test(resid(fish_weight)) # normality test in residual model
##
## Shapiro-Wilk normality test
##
## data: resid(fish_weight)
## W = 0.7968, p-value = 1.371e-13
# data isn't normal, WATCH OUT !
par(mfrow=c(2,2)) # Change the panel layout to 2 x 2
plot(fish_weight)
# par(mfrow=c(1,1)) # Change back to 1 x 1
fish_height <- lm(Height ~ Width + Species, data = fish_df)
summary(fish_height)
##
## Call:
## lm(formula = Height ~ Width + Species, data = fish_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.25106 -0.27768 0.01502 0.33825 1.81963
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.1594 0.2512 24.52 <2e-16 ***
## Width 1.6626 0.0418 39.77 <2e-16 ***
## SpeciesParkki -2.5517 0.2391 -10.67 <2e-16 ***
## SpeciesPerch -6.1876 0.1404 -44.05 <2e-16 ***
## SpeciesPike -6.9021 0.1892 -36.48 <2e-16 ***
## SpeciesRoach -5.5461 0.1936 -28.65 <2e-16 ***
## SpeciesSmelt -6.1781 0.2645 -23.36 <2e-16 ***
## SpeciesWhitefish -5.2316 0.2820 -18.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6383 on 151 degrees of freedom
## Multiple R-squared: 0.9788, Adjusted R-squared: 0.9778
## F-statistic: 996.3 on 7 and 151 DF, p-value: < 2.2e-16
resid(fish_height)
## 1 2 3 4 5
## -1.3229617600 -0.8377902070 -1.5892216796 -0.8370086392 -2.2510587139
## 6 7 8 9 10
## -0.7491728694 -0.7557992972 -1.2868800356 -0.2076824697 -0.1781749363
## 11 12 13 14 15
## -0.3827142891 0.2073644164 0.3376657625 -0.6804097609 0.1981589092
## 16 17 18 19 20
## 0.0014374787 -0.0863709929 0.1373684346 0.9382737991 -1.2081570548
## 21 22 23 24 25
## -0.2906055931 0.9055110169 0.5847405995 -0.8833705848 0.7889743974
## 26 27 28 29 30
## 0.1367119145 0.0773295376 0.6290511924 0.4249075359 1.8196339026
## 31 32 33 34 35
## 1.3928168166 1.4636914903 1.3729356772 1.2581258322 0.8726801699
## 36 37 38 39 40
## -0.2368796038 -0.0063416176 0.1334642908 -0.2747606616 -0.3397214720
## 41 42 43 44 45
## 0.2895654059 -0.0912536926 0.0242303569 -1.0508156112 0.1220702332
## 46 47 48 49 50
## 0.0685213823 0.4085896240 -0.5311285565 0.5447491699 0.2648123463
## 51 52 53 54 55
## -0.0783196906 -0.0207604031 0.7871964367 0.0181985581 -0.0314164949
## 56 57 58 59 60
## 0.3906231108 0.1541268027 0.0666700785 -1.0936163776 -0.0957021268
## 61 62 63 64 65
## 0.5778985123 -0.6282015589 -0.8780520145 -0.6465801896 -0.0670747707
## 66 67 68 69 70
## -0.1894377228 -0.5472978262 0.1182446813 0.8797722921 0.6470833270
## 71 72 73 74 75
## 0.5905872977 0.7209564845 -0.2006892137 0.2324026004 -0.1911553544
## 76 77 78 79 80
## 0.2453967533 -0.2742320133 -0.2717728142 0.0345474877 0.5928472157
## 81 82 83 84 85
## 0.1169255812 0.5015726848 -0.1897153896 0.4467693465 -0.3767539061
## 86 87 88 89 90
## 0.5366984668 0.4730128875 -0.1923384519 0.2776615481 0.0426615481
## 91 92 93 94 95
## -1.0912438095 -0.1409323464 0.7950676536 -0.0537077339 -0.7817574755
## 96 97 98 99 100
## 0.1101486300 1.1314737592 0.0438921709 -0.1643098415 0.3436450605
## 101 102 103 104 105
## -0.4584095696 0.3065340665 -0.1898913413 -0.0118157703 -0.1274157703
## 106 107 108 109 110
## -0.2806007036 0.8588476782 -0.0932501733 -0.2892914705 0.0528880238
## 111 112 113 114 115
## -0.3349295924 -1.4442530488 -0.5026416198 -0.5697136623 0.4478995958
## 116 117 118 119 120
## 0.0150180075 -1.2934597731 1.1844264927 0.2373243987 0.1419527234
## 121 122 123 124 125
## -0.2538274370 0.0022797292 0.1403984944 1.4092430960 -0.1378133086
## 126 127 128 129 130
## 0.2102914439 -0.9043991666 -0.1115063872 0.6985155739 -0.4624749617
## 131 132 133 134 135
## -0.6102795850 0.3478914647 0.4239417571 0.5266492292 0.8362331631
## 136 137 138 139 140
## 0.1542930965 0.0003772821 -0.4372476296 0.4379790498 -0.5152786330
## 141 142 143 144 145
## -0.5911499955 -0.1838788028 0.1278717614 0.1278717614 -0.8813145316
## 146 147 148 149 150
## 0.0157168387 0.0620445787 -0.1622696720 -0.0797196312 -0.0216079913
## 151 152 153 154 155
## -0.1399085080 0.0981190824 0.2941354476 0.3137303280 -0.2079305096
## 156 157 158 159
## 0.3388250384 0.2077708910 -0.5454340178 -0.1734718748
shapiro.test(resid(fish_height)) # normality test in residual model
##
## Shapiro-Wilk normality test
##
## data: resid(fish_height)
## W = 0.98169, p-value = 0.03333
# data is normal, SAFE!
par(mfrow=c(2,2))
plot(fish_height)
boxplot(fish_df$Weight, main="Weight")
boxplot(fish_df[,c(3,4,5)], main="Lengths")
boxplot(fish_df$Height, main="Height")
boxplot(fish_df$Width, main="Width")
### Removing Outliers
fish_df <- fish_df %>%
filter(Weight<1500)
b1 = boxplot(fish_df$Weight, main="Weight")
b2 = boxplot(fish_df[,c(3,4,5)], main="Lengths")
b3 = boxplot(fish_df$Height, main="Height")
b4 = boxplot(fish_df$Width, main="Width")
### Correlation Table
cor(fish_df[,-1])
## Weight Length1 Length2 Length3 Height Width
## Weight 1.0000000 0.8957426 0.8997591 0.9072922 0.8018712 0.9234322
## Length1 0.8957426 1.0000000 0.9994183 0.9906124 0.6763260 0.8891659
## Length2 0.8997591 0.9994183 1.0000000 0.9930493 0.6919540 0.8955835
## Length3 0.9072922 0.9906124 0.9930493 1.0000000 0.7560186 0.8970535
## Height 0.8018712 0.6763260 0.6919540 0.7560186 1.0000000 0.8005423
## Width 0.9234322 0.8891659 0.8955835 0.8970535 0.8005423 1.0000000
pairs(fish_df[,-1])
round(cor(fish_df[,-1]),3)
## Weight Length1 Length2 Length3 Height Width
## Weight 1.000 0.896 0.900 0.907 0.802 0.923
## Length1 0.896 1.000 0.999 0.991 0.676 0.889
## Length2 0.900 0.999 1.000 0.993 0.692 0.896
## Length3 0.907 0.991 0.993 1.000 0.756 0.897
## Height 0.802 0.676 0.692 0.756 1.000 0.801
## Width 0.923 0.889 0.896 0.897 0.801 1.000
set.seed(1234)
ind=sample(2, nrow(fish_df), replace=TRUE, prob=c(0.7, 0.3))
training=fish_df[ind==1, ]
testing=fish_df[ind==2, ]
head(training)
## Species Weight Length1 Length2 Length3 Height Width
## 1 Bream 242 23.2 25.4 30.0 11.5200 4.0200
## 2 Bream 290 24.0 26.3 31.2 12.4800 4.3056
## 3 Bream 340 23.9 26.5 31.1 12.3778 4.6961
## 4 Bream 363 26.3 29.0 33.5 12.7300 4.4555
## 6 Bream 450 26.8 29.7 34.7 13.6024 4.9274
## 7 Bream 500 26.8 29.7 34.5 14.1795 5.2785
head(testing)
## Species Weight Length1 Length2 Length3 Height Width
## 5 Bream 430 26.5 29 34.0 12.4440 5.1340
## 14 Bream 340 29.5 32 37.3 13.9129 5.0728
## 16 Bream 600 29.4 32 37.2 15.4380 5.5800
## 26 Bream 725 31.8 35 40.9 16.3600 6.0532
## 28 Bream 714 32.7 36 41.5 16.5170 5.8515
## 29 Bream 850 32.8 36 41.6 16.8896 6.1984
# library(devtools)
# devtools::install_github("julianfaraway/faraway")
library(faraway)
model <- lm(Weight ~ Length1 + Length2 + Length3 + Width + Height, data = training)
summary(model)
##
## Call:
## lm(formula = Weight ~ Length1 + Length2 + Length3 + Width + Height,
## data = training)
##
## Residuals:
## Min 1Q Median 3Q Max
## -163.85 -65.17 -16.16 38.55 239.78
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -400.297 27.881 -14.357 < 2e-16 ***
## Length1 111.964 34.201 3.274 0.00142 **
## Length2 -61.832 35.832 -1.726 0.08723 .
## Length3 -31.398 15.188 -2.067 0.04106 *
## Width 48.992 18.253 2.684 0.00840 **
## Height 39.543 7.598 5.205 9.1e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 92.07 on 110 degrees of freedom
## Multiple R-squared: 0.9088, Adjusted R-squared: 0.9046
## F-statistic: 219.1 on 5 and 110 DF, p-value: < 2.2e-16
vif(model)
## Length1 Length2 Length3 Width Height
## 1172.99999 1487.94821 327.49198 11.36251 15.29040
# All the lengths have high vif values. But Length3 has much lower values than the other two.
# Lets predict another model with just the lengths.
model1 <- lm(Weight ~ Length1 + Length2 + Length3, data = training)
summary(model1)
##
## Call:
## lm(formula = Weight ~ Length1 + Length2 + Length3, data = training)
##
## Residuals:
## Min 1Q Median 3Q Max
## -292.62 -65.22 -32.07 68.88 373.70
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -443.84 37.09 -11.966 < 2e-16 ***
## Length1 -43.88 41.71 -1.052 0.29510
## Length2 31.12 45.16 0.689 0.49225
## Length3 34.98 10.42 3.358 0.00107 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 126.4 on 112 degrees of freedom
## Multiple R-squared: 0.825, Adjusted R-squared: 0.8203
## F-statistic: 176 on 3 and 112 DF, p-value: < 2.2e-16
vif(model1)
## Length1 Length2 Length3
## 926.11827 1254.71693 81.76322
# Again we found that the Length3 has a lower value.
model2 <- lm(Weight ~ Length3 + Width + Height, data = training)
summary(model2)
##
## Call:
## lm(formula = Weight ~ Length3 + Width + Height, data = training)
##
## Residuals:
## Min 1Q Median 3Q Max
## -165.38 -68.86 -37.45 45.60 289.00
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -432.082 28.577 -15.120 < 2e-16 ***
## Length3 10.316 2.080 4.959 2.54e-06 ***
## Width 78.398 14.192 5.524 2.18e-07 ***
## Height 15.364 3.573 4.301 3.65e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 98.25 on 112 degrees of freedom
## Multiple R-squared: 0.8942, Adjusted R-squared: 0.8914
## F-statistic: 315.6 on 3 and 112 DF, p-value: < 2.2e-16
vif(model2)
## Length3 Width Height
## 5.395211 6.030877 2.968547
# In the model2 all the vif values are less than 10. So, we are going to take those variable for further analysis.
plot(Weight ~ Length3 + Width + Height, training)
abline(model2, col="blue")
par(mfrow=c(2,2))
plot(model)
pred=predict(model2, testing)
head(pred)
## 5 14 16 26 28 29
## 512.3641 564.1789 626.3428 715.7772 708.5664 742.5190
head(testing)
## Species Weight Length1 Length2 Length3 Height Width
## 5 Bream 430 26.5 29 34.0 12.4440 5.1340
## 14 Bream 340 29.5 32 37.3 13.9129 5.0728
## 16 Bream 600 29.4 32 37.2 15.4380 5.5800
## 26 Bream 725 31.8 35 40.9 16.3600 6.0532
## 28 Bream 714 32.7 36 41.5 16.5170 5.8515
## 29 Bream 850 32.8 36 41.6 16.8896 6.1984
tail(pred)
## 140 142 147 149 154 156
## 635.5827 895.5962 -172.0217 -162.2747 -156.2789 -72.5796
tail(testing)
## Species Weight Length1 Length2 Length3 Height Width
## 140 Pike 770.0 44.8 48.0 51.2 7.6800 5.3760
## 142 Pike 1250.0 52.0 56.0 59.7 10.6863 6.9849
## 147 Smelt 9.8 10.7 11.2 12.4 2.0832 1.2772
## 149 Smelt 10.0 11.3 11.8 13.1 2.2139 1.2838
## 154 Smelt 12.2 12.1 13.0 13.8 2.2770 1.2558
## 156 Smelt 19.9 13.8 15.0 16.2 2.9322 1.8792
We can see the predicted values and the actual values are differs a lot and in some of the cases we are getting negative weights which is impossible. All the negative values are only occuring in the case of Smelt Species. Considering the other Species we can see less difference in the predicted and actual values. So, after eliminating Smelt Species we can predict the weight much better and approximately.
# Assign each fish species a color (7 species, 7 colors)
unique(fish_df)
## Species Weight Length1 Length2 Length3 Height Width
## 1 Bream 242.0 23.2 25.4 30.0 11.5200 4.0200
## 2 Bream 290.0 24.0 26.3 31.2 12.4800 4.3056
## 3 Bream 340.0 23.9 26.5 31.1 12.3778 4.6961
## 4 Bream 363.0 26.3 29.0 33.5 12.7300 4.4555
## 5 Bream 430.0 26.5 29.0 34.0 12.4440 5.1340
## 6 Bream 450.0 26.8 29.7 34.7 13.6024 4.9274
## 7 Bream 500.0 26.8 29.7 34.5 14.1795 5.2785
## 8 Bream 390.0 27.6 30.0 35.0 12.6700 4.6900
## 9 Bream 450.0 27.6 30.0 35.1 14.0049 4.8438
## 10 Bream 500.0 28.5 30.7 36.2 14.2266 4.9594
## 11 Bream 475.0 28.4 31.0 36.2 14.2628 5.1042
## 12 Bream 500.0 28.7 31.0 36.2 14.3714 4.8146
## 13 Bream 500.0 29.1 31.5 36.4 13.7592 4.3680
## 14 Bream 340.0 29.5 32.0 37.3 13.9129 5.0728
## 15 Bream 600.0 29.4 32.0 37.2 14.9544 5.1708
## 16 Bream 600.0 29.4 32.0 37.2 15.4380 5.5800
## 17 Bream 700.0 30.4 33.0 38.3 14.8604 5.2854
## 18 Bream 700.0 30.4 33.0 38.5 14.9380 5.1975
## 19 Bream 610.0 30.9 33.5 38.6 15.6330 5.1338
## 20 Bream 650.0 31.0 33.5 38.7 14.4738 5.7276
## 21 Bream 575.0 31.3 34.0 39.5 15.1285 5.5695
## 22 Bream 685.0 31.4 34.0 39.2 15.9936 5.3704
## 23 Bream 620.0 31.5 34.5 39.7 15.5227 5.2801
## 24 Bream 680.0 31.8 35.0 40.6 15.4686 6.1306
## 25 Bream 700.0 31.9 35.0 40.5 16.2405 5.5890
## 26 Bream 725.0 31.8 35.0 40.9 16.3600 6.0532
## 27 Bream 720.0 32.0 35.0 40.6 16.3618 6.0900
## 28 Bream 714.0 32.7 36.0 41.5 16.5170 5.8515
## 29 Bream 850.0 32.8 36.0 41.6 16.8896 6.1984
## 30 Bream 1000.0 33.5 37.0 42.6 18.9570 6.6030
## 31 Bream 920.0 35.0 38.5 44.1 18.0369 6.3063
## 32 Bream 955.0 35.0 38.5 44.0 18.0840 6.2920
## 33 Bream 925.0 36.2 39.5 45.3 18.7542 6.7497
## 34 Bream 975.0 37.4 41.0 45.9 18.6354 6.7473
## 35 Bream 950.0 38.0 41.0 46.5 17.6235 6.3705
## 36 Roach 40.0 12.9 14.1 16.2 4.1472 2.2680
## 37 Roach 69.0 16.5 18.2 20.3 5.2983 2.8217
## 38 Roach 78.0 17.5 18.8 21.2 5.5756 2.9044
## 39 Roach 87.0 18.2 19.8 22.2 5.6166 3.1746
## 40 Roach 120.0 18.6 20.0 22.2 6.2160 3.5742
## 41 Roach 0.0 19.0 20.5 22.8 6.4752 3.3516
## 42 Roach 110.0 19.1 20.8 23.1 6.1677 3.3957
## 43 Roach 120.0 19.4 21.0 23.7 6.1146 3.2943
## 44 Roach 150.0 20.4 22.0 24.7 5.8045 3.7544
## 45 Roach 145.0 20.5 22.0 24.3 6.6339 3.5478
## 46 Roach 160.0 20.5 22.5 25.3 7.0334 3.8203
## 47 Roach 140.0 21.0 22.5 25.0 6.5500 3.3250
## 48 Roach 160.0 21.1 22.5 25.0 6.4000 3.8000
## 49 Roach 169.0 22.0 24.0 27.2 7.5344 3.8352
## 50 Roach 161.0 22.0 23.4 26.7 6.9153 3.6312
## 51 Roach 200.0 22.1 23.5 26.8 7.3968 4.1272
## 52 Roach 180.0 23.6 25.2 27.9 7.0866 3.9060
## 53 Roach 290.0 24.0 26.0 29.2 8.8768 4.4968
## 54 Roach 272.0 25.0 27.0 30.6 8.5680 4.7736
## 55 Roach 390.0 29.5 31.7 35.0 9.4850 5.3550
## 56 Whitefish 270.0 23.6 26.0 28.7 8.3804 4.2476
## 57 Whitefish 270.0 24.1 26.5 29.3 8.1454 4.2485
## 58 Whitefish 306.0 25.6 28.0 30.8 8.7780 4.6816
## 59 Whitefish 540.0 28.5 31.0 34.0 10.7440 6.5620
## 60 Whitefish 800.0 33.7 36.4 39.6 11.7612 6.5736
## 61 Whitefish 1000.0 37.3 40.0 43.5 12.3540 6.5250
## 62 Parkki 55.0 13.5 14.7 16.5 6.8475 2.3265
## 63 Parkki 60.0 14.3 15.5 17.4 6.5772 2.3142
## 64 Parkki 90.0 16.3 17.7 19.8 7.4052 2.6730
## 65 Parkki 120.0 17.5 19.0 21.3 8.3922 2.9181
## 66 Parkki 150.0 18.4 20.0 22.4 8.8928 3.2928
## 67 Parkki 140.0 19.0 20.7 23.2 8.5376 3.2944
## 68 Parkki 170.0 19.0 20.7 23.2 9.3960 3.4104
## 69 Parkki 145.0 19.8 21.5 24.1 9.7364 3.1571
## 70 Parkki 200.0 21.2 23.0 25.8 10.3458 3.6636
## 71 Parkki 273.0 23.0 25.0 28.0 11.0880 4.1440
## 72 Parkki 300.0 24.0 26.0 29.0 11.3680 4.2340
## 73 Perch 5.9 7.5 8.4 8.8 2.1120 1.4080
## 74 Perch 32.0 12.5 13.7 14.7 3.5280 1.9992
## 75 Perch 40.0 13.8 15.0 16.0 3.8240 2.4320
## 76 Perch 51.5 15.0 16.2 17.2 4.5924 2.6316
## 77 Perch 70.0 15.7 17.4 18.5 4.5880 2.9415
## 78 Perch 100.0 16.2 18.0 19.2 5.2224 3.3216
## 79 Perch 78.0 16.8 18.7 19.4 5.1992 3.1234
## 80 Perch 80.0 17.2 19.0 20.2 5.6358 3.0502
## 81 Perch 85.0 17.8 19.6 20.8 5.1376 3.0368
## 82 Perch 85.0 18.2 20.0 21.0 5.0820 2.7720
## 83 Perch 110.0 19.0 21.0 22.5 5.6925 3.5550
## 84 Perch 115.0 19.0 21.0 22.5 5.9175 3.3075
## 85 Perch 125.0 19.0 21.0 22.5 5.6925 3.6675
## 86 Perch 130.0 19.3 21.3 22.8 6.3840 3.5340
## 87 Perch 120.0 20.0 22.0 23.5 6.1100 3.4075
## 88 Perch 120.0 20.0 22.0 23.5 5.6400 3.5250
## 89 Perch 130.0 20.0 22.0 23.5 6.1100 3.5250
## 90 Perch 135.0 20.0 22.0 23.5 5.8750 3.5250
## 91 Perch 110.0 20.0 22.0 23.5 5.5225 3.9950
## 92 Perch 130.0 20.5 22.5 24.0 5.8560 3.6240
## 93 Perch 150.0 20.5 22.5 24.0 6.7920 3.6240
## 94 Perch 145.0 20.7 22.7 24.2 5.9532 3.6300
## 95 Perch 150.0 21.0 23.0 24.5 5.2185 3.6260
## 96 Perch 170.0 21.5 23.5 25.0 6.2750 3.7250
## 97 Perch 225.0 22.0 24.0 25.5 7.2930 3.7230
## 98 Perch 145.0 22.0 24.0 25.5 6.3750 3.8250
## 99 Perch 188.0 22.6 24.6 26.2 6.7334 4.1658
## 100 Perch 180.0 23.0 25.0 26.5 6.4395 3.6835
## 101 Perch 197.0 23.5 25.6 27.0 6.5610 4.2390
## 102 Perch 218.0 25.0 26.5 28.0 7.1680 4.1440
## 103 Perch 300.0 25.2 27.3 28.7 8.3230 5.1373
## 104 Perch 260.0 25.4 27.5 28.9 7.1672 4.3350
## 105 Perch 265.0 25.4 27.5 28.9 7.0516 4.3350
## 106 Perch 250.0 25.4 27.5 28.9 7.2828 4.5662
## 107 Perch 250.0 25.9 28.0 29.4 7.8204 4.2042
## 108 Perch 300.0 26.9 28.7 30.1 7.5852 4.6354
## 109 Perch 320.0 27.8 30.0 31.6 7.6156 4.7716
## 110 Perch 514.0 30.5 32.8 34.0 10.0300 6.0180
## 111 Perch 556.0 32.0 34.5 36.5 10.2565 6.3875
## 112 Perch 840.0 32.5 35.0 37.3 11.4884 7.7957
## 113 Perch 685.0 34.0 36.5 39.0 10.8810 6.8640
## 114 Perch 700.0 34.0 36.0 38.3 10.6091 6.7408
## 115 Perch 700.0 34.5 37.0 39.4 10.8350 6.2646
## 116 Perch 690.0 34.6 37.0 39.3 10.5717 6.3666
## 117 Perch 900.0 36.5 39.0 41.4 11.1366 7.4934
## 118 Perch 650.0 36.5 39.0 41.4 11.1366 6.0030
## 119 Perch 820.0 36.6 39.0 41.3 12.4313 7.3514
## 120 Perch 850.0 36.9 40.0 42.3 11.9286 7.1064
## 121 Perch 900.0 37.0 40.0 42.5 11.7300 7.2250
## 122 Perch 1015.0 37.0 40.0 42.4 12.3808 7.4624
## 123 Perch 820.0 37.1 40.0 42.5 11.1350 6.6300
## 124 Perch 1100.0 39.0 42.0 44.6 12.8002 6.8684
## 125 Perch 1000.0 39.8 43.0 45.2 11.9328 7.2772
## 126 Perch 1100.0 40.1 43.0 45.5 12.5125 7.4165
## 127 Perch 1000.0 40.2 43.5 46.0 12.6040 8.1420
## 128 Perch 1000.0 41.1 44.0 46.6 12.4888 7.5958
## 129 Pike 200.0 30.0 32.3 34.8 5.5680 3.3756
## 130 Pike 300.0 31.7 34.0 37.8 5.7078 4.1580
## 131 Pike 300.0 32.7 35.0 38.8 5.9364 4.3844
## 132 Pike 300.0 34.8 37.3 39.8 6.2884 4.0198
## 133 Pike 430.0 35.5 38.0 40.5 7.2900 4.5765
## 134 Pike 345.0 36.0 38.5 41.0 6.3960 3.9770
## 135 Pike 456.0 40.0 42.5 45.5 7.2800 4.3225
## 136 Pike 510.0 40.0 42.5 45.5 6.8250 4.4590
## 137 Pike 540.0 40.1 43.0 45.8 7.7860 5.1296
## 138 Pike 500.0 42.0 45.0 48.0 6.9600 4.8960
## 139 Pike 567.0 43.2 46.0 48.7 7.7920 4.8700
## 140 Pike 770.0 44.8 48.0 51.2 7.6800 5.3760
## 141 Pike 950.0 48.3 51.7 55.1 8.9262 6.1712
## 142 Pike 1250.0 52.0 56.0 59.7 10.6863 6.9849
## 143 Smelt 6.7 9.3 9.8 10.8 1.7388 1.0476
## 144 Smelt 7.5 10.0 10.5 11.6 1.9720 1.1600
## 145 Smelt 7.0 10.1 10.6 11.6 1.7284 1.1484
## 146 Smelt 9.7 10.4 11.0 12.0 2.1960 1.3800
## 147 Smelt 9.8 10.7 11.2 12.4 2.0832 1.2772
## 148 Smelt 8.7 10.8 11.3 12.6 1.9782 1.2852
## 149 Smelt 10.0 11.3 11.8 13.1 2.2139 1.2838
## 150 Smelt 9.9 11.3 11.8 13.1 2.2139 1.1659
## 151 Smelt 9.8 11.4 12.0 13.2 2.2044 1.1484
## 152 Smelt 12.2 11.5 12.2 13.4 2.0904 1.3936
## 153 Smelt 13.4 11.7 12.4 13.5 2.4300 1.2690
## 154 Smelt 12.2 12.1 13.0 13.8 2.2770 1.2558
## 155 Smelt 19.7 13.2 14.3 15.2 2.8728 2.0672
## 156 Smelt 19.9 13.8 15.0 16.2 2.9322 1.8792
COLORS <- c("Bream" = "red", "Roach" = "coral", "Whitefish" = "gold", "Parkki" = "darkolivegreen3", "Perch" = "slategray1", "Pike" = "purple", "Smelt" = "slateblue1")
# Using the fct_reorder library(forcats) to order the boxplots
# Using ggpubr to add the statistical testing at the top of each chart
library(forcats)
library(ggpubr)
### BOXPLOT of SPECIES and WEIGHT
ggplot(fish_df, aes(x=fct_reorder(Species, Weight, .desc=TRUE), y=Weight)) +
geom_boxplot(aes(fill=Species)) +
theme(legend.position="none") +
ggtitle("Species and Weight") +
ylab("Weight (g)") + xlab("Species") +
scale_fill_manual(values=COLORS) +
stat_compare_means(method="kruskal.test")
### BOXPLOT of SPECIES and VERTICAL LENGTH
ggplot(fish_df, aes(x=fct_reorder(Species, Length1, .desc=TRUE), y=Length1)) +
geom_boxplot(aes(fill=Species)) +
theme(legend.position="none") +
ggtitle("Species and Vertical Length") +
ylab("Vertical Length (cm)") + xlab("Species") +
scale_fill_manual(values=COLORS) +
stat_compare_means(method="kruskal.test")
### BOXPLOT of SPECIES and CROSS LENGTH
ggplot(fish_df, aes(x=fct_reorder(Species, Length3, .desc=TRUE), y=Length3)) +
geom_boxplot(aes(fill=Species)) +
theme(legend.position="none") +
ggtitle("Species and Cross Length") +
ylab("Cross Length (cm)") + xlab("Species") +
scale_fill_manual(values=COLORS) +
stat_compare_means(method="kruskal.test")
### BOXPLOT of SPECIES and HEIGHT
ggplot(fish_df, aes(x=fct_reorder(Species, Height, .desc=TRUE), y=Height)) +
geom_boxplot(aes(fill=Species)) +
theme(legend.position="none") +
ggtitle("Species and Height") +
ylab("Height (cm)") + xlab("Species") +
scale_fill_manual(values=COLORS) +
stat_compare_means(method="kruskal.test")
### BOXPLOT of SPECIES and DIAGONAL WIDTH
ggplot(fish_df, aes(x=fct_reorder(Species, Width, .desc=TRUE), y=Width)) +
geom_boxplot(aes(fill=Species)) +
theme(legend.position="none") +
ggtitle("Species and Diagonal Width") +
ylab("Width (cm)") + xlab("Species") +
scale_fill_manual(values=COLORS) +
stat_compare_means(method="kruskal.test")