Tutorial : How to use Linear Model on Fish Body Measurements

Overview : Data Structure

Fish Data Understanding : > Three length measurements - Length1: Standard Length (SL); in cm - Length2: Fork Length (FL); in cm
- Length3: Total Length (TL); in cm > Body width and body height - Width: Body Width (BW) - Height: Body Height (BH)

Deploy linear model

library(readr) 
library(dplyr) # data cleaning and manipulation

fish_df <- read.csv("Fish.csv", sep = ",") %>% # Load a built-in data set called 'fish'
                select(Species, Weight, Length1, Length2, Length3, Height, Width) # Select species, weight, height, and width data
cols(
  Species = col_character(),
  Weight = col_double(),
  Length1 = col_double(),
  Length2 = col_double(),
  Length3 = col_double(),
  Height = col_double(),
  Width = col_double()
)
## cols(
##   Species = col_character(),
##   Weight = col_double(),
##   Length1 = col_double(),
##   Length2 = col_double(),
##   Length3 = col_double(),
##   Height = col_double(),
##   Width = col_double()
## )
fish <- fish_df %>% # change column names
    rename(sl = Length1,
           fl = Length2,
           tl = Length3,
           bw = Width,
           bh = Height,
           m = Weight,
           s = Species)

What are we working with?

  • Species (fct): The name of the fish species
  • Weight (num): Weight of the fish (g)
  • Length1 (num): Vertical length (cm)
  • Length2 (num): Diagonal length (cm)
  • Length3 (num): Cross length (cm)
  • Height (num): Height (cm)
  • Width (num): Diagonal width (cm) > Weight is dependent variable > All variables except Species are continous and expceted to be normally distributed > Because our dependent variable is continous, this mkaes it a Multiple Linear Regression problem. > The goal of multiple linear regression is to predict the weight based on the variables in the model.

Weight as response

fish_weight <- lm(Weight ~ Height + Width, data = fish_df) # Run a regression analysis
summary(fish_weight)
## 
## Call:
## lm(formula = Weight ~ Height + Width, data = fish_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -249.72  -98.15  -47.17   60.29  890.42 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -433.576     37.075 -11.694   <2e-16 ***
## Height         4.825      5.063   0.953    0.342    
## Width        178.523     12.872  13.869   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 166.2 on 156 degrees of freedom
## Multiple R-squared:  0.7871, Adjusted R-squared:  0.7844 
## F-statistic: 288.4 on 2 and 156 DF,  p-value: < 2.2e-16
resid(fish_weight)
##            1            2            3            4            5            6 
##  -97.6642210 -105.2818700 -124.5018448  -60.2485469 -112.9962499  -61.7023088 
##            7            8            9           10           11           12 
##  -77.1658451  -74.8226048  -48.7197257  -20.4265437  -71.4512568    4.7249172 
##           13           14           15           16           17           18 
##   87.4066986 -199.1575225   38.3224508  -37.0621447  118.3172808  133.6350226 
##           19           20           21           22           23           24 
##   51.6538125   -8.7601933  -58.6944451   82.6756315   36.0681192  -55.5042835 
##           25           26           27           28           29           30 
##   57.4594128   -0.9872859  -12.5655994   23.2632469   95.5361355  163.3315457 
##           31           32           33           34           35           36 
##  140.7382933  178.0639270   63.1207187  114.1223350  161.2716355   48.6780490 
##           37           38           39           40           41           42 
##  -26.7234713  -33.8251457  -73.2597423 -114.4892099 -196.0006301  -92.3899099 
##           43           44           45           46           47           48 
##  -64.0315390 -114.6736482  -86.7924145 -122.3672312  -51.6128106 -115.6873236 
##           49           50           51           52           53           54 
## -118.4443404  -87.0388354 -138.9090551 -117.9232808 -122.0313875 -187.9565886 
##           55           56           57           58           59           60 
## -178.1737447  -95.1486422  -94.1755321 -138.5456812 -249.7246093    3.2969485 
##           61           62           63           64           65           66 
##  209.1131220   40.2066197   48.7065356   10.6578842   -7.8598663  -47.1674527 
##           67           68           69           70           71           72 
##  -55.7393916  -50.5894400  -32.0119757  -70.3737480  -86.7167888  -77.1347040 
##           73           74           75           76           77           78 
##  177.9264478   91.6522965   20.9596638   -6.8806545  -43.6835589  -84.6006981 
##           79           80           81           82           83           84 
##  -71.1056016  -58.1441716  -48.3483551   -0.8073410 -118.5359004  -70.4371082 
##           85           86           87           88           89           90 
## -123.6196852  -98.1231366  -84.2180965 -102.9269331  -95.1944940  -89.0607135 
##           91           92           93           94           95           96 
## -196.2656328 -111.6427768  -96.1586003  -98.1828629  -88.9241408  -91.6950590 
##           97           98           99          100          101          102 
##  -41.2494544 -135.0297720 -154.5993867  -75.0800203 -157.8354754 -122.8043657 
##          103          104          105          106          107          108 
## -223.7032070 -114.8983096 -109.3405861 -166.7304424 -104.6989896 -130.5431600 
##          109          110          111          112          113          114 
## -135.0045966 -175.1635892 -200.2204361 -173.5592875 -159.2993838 -120.9935998 
##          115          116          117          118          119          120 
##  -37.0710467  -64.0100284  -57.8946326  -41.8246510 -118.7908397  -42.6274941 
##          121          122          123          124          125          126 
##  -12.8421013   56.6368014   16.2494408  245.6557491   76.8605941  149.1955861 
##          127          128          129          130          131          132 
##  -80.7639615   17.3008391    4.0917042  -36.2588035  -77.7792078  -14.3881510 
##          133          134          135          136          137          138 
##   11.3960384   37.7334866   82.7890075  114.6158738   20.2622255   25.9502067 
##          139          140          141          142          143          144 
##   93.5777272  206.7856810  238.8121499  385.0565912  890.4171488  840.4171488 
##          145          146          147          148          149          150 
##  696.0636338  244.8665082  224.4754794  227.2216128  186.3198125  205.3161434 
##          151          152          153          154          155          156 
##  203.2945459  203.7073197  224.6551262  227.7251042  186.9013835  208.7068576 
##          157          158          159 
##  210.6015185   70.3738408  103.8494957
shapiro.test(resid(fish_weight)) # normality test in residual model
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(fish_weight)
## W = 0.7968, p-value = 1.371e-13
                                 # data isn't normal, WATCH OUT !
par(mfrow=c(2,2)) # Change the panel layout to 2 x 2
plot(fish_weight)

# par(mfrow=c(1,1)) # Change back to 1 x 1

Height as response

fish_height <- lm(Height ~ Width + Species, data = fish_df)
summary(fish_height)
## 
## Call:
## lm(formula = Height ~ Width + Species, data = fish_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.25106 -0.27768  0.01502  0.33825  1.81963 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        6.1594     0.2512   24.52   <2e-16 ***
## Width              1.6626     0.0418   39.77   <2e-16 ***
## SpeciesParkki     -2.5517     0.2391  -10.67   <2e-16 ***
## SpeciesPerch      -6.1876     0.1404  -44.05   <2e-16 ***
## SpeciesPike       -6.9021     0.1892  -36.48   <2e-16 ***
## SpeciesRoach      -5.5461     0.1936  -28.65   <2e-16 ***
## SpeciesSmelt      -6.1781     0.2645  -23.36   <2e-16 ***
## SpeciesWhitefish  -5.2316     0.2820  -18.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6383 on 151 degrees of freedom
## Multiple R-squared:  0.9788, Adjusted R-squared:  0.9778 
## F-statistic: 996.3 on 7 and 151 DF,  p-value: < 2.2e-16
resid(fish_height)
##             1             2             3             4             5 
## -1.3229617600 -0.8377902070 -1.5892216796 -0.8370086392 -2.2510587139 
##             6             7             8             9            10 
## -0.7491728694 -0.7557992972 -1.2868800356 -0.2076824697 -0.1781749363 
##            11            12            13            14            15 
## -0.3827142891  0.2073644164  0.3376657625 -0.6804097609  0.1981589092 
##            16            17            18            19            20 
##  0.0014374787 -0.0863709929  0.1373684346  0.9382737991 -1.2081570548 
##            21            22            23            24            25 
## -0.2906055931  0.9055110169  0.5847405995 -0.8833705848  0.7889743974 
##            26            27            28            29            30 
##  0.1367119145  0.0773295376  0.6290511924  0.4249075359  1.8196339026 
##            31            32            33            34            35 
##  1.3928168166  1.4636914903  1.3729356772  1.2581258322  0.8726801699 
##            36            37            38            39            40 
## -0.2368796038 -0.0063416176  0.1334642908 -0.2747606616 -0.3397214720 
##            41            42            43            44            45 
##  0.2895654059 -0.0912536926  0.0242303569 -1.0508156112  0.1220702332 
##            46            47            48            49            50 
##  0.0685213823  0.4085896240 -0.5311285565  0.5447491699  0.2648123463 
##            51            52            53            54            55 
## -0.0783196906 -0.0207604031  0.7871964367  0.0181985581 -0.0314164949 
##            56            57            58            59            60 
##  0.3906231108  0.1541268027  0.0666700785 -1.0936163776 -0.0957021268 
##            61            62            63            64            65 
##  0.5778985123 -0.6282015589 -0.8780520145 -0.6465801896 -0.0670747707 
##            66            67            68            69            70 
## -0.1894377228 -0.5472978262  0.1182446813  0.8797722921  0.6470833270 
##            71            72            73            74            75 
##  0.5905872977  0.7209564845 -0.2006892137  0.2324026004 -0.1911553544 
##            76            77            78            79            80 
##  0.2453967533 -0.2742320133 -0.2717728142  0.0345474877  0.5928472157 
##            81            82            83            84            85 
##  0.1169255812  0.5015726848 -0.1897153896  0.4467693465 -0.3767539061 
##            86            87            88            89            90 
##  0.5366984668  0.4730128875 -0.1923384519  0.2776615481  0.0426615481 
##            91            92            93            94            95 
## -1.0912438095 -0.1409323464  0.7950676536 -0.0537077339 -0.7817574755 
##            96            97            98            99           100 
##  0.1101486300  1.1314737592  0.0438921709 -0.1643098415  0.3436450605 
##           101           102           103           104           105 
## -0.4584095696  0.3065340665 -0.1898913413 -0.0118157703 -0.1274157703 
##           106           107           108           109           110 
## -0.2806007036  0.8588476782 -0.0932501733 -0.2892914705  0.0528880238 
##           111           112           113           114           115 
## -0.3349295924 -1.4442530488 -0.5026416198 -0.5697136623  0.4478995958 
##           116           117           118           119           120 
##  0.0150180075 -1.2934597731  1.1844264927  0.2373243987  0.1419527234 
##           121           122           123           124           125 
## -0.2538274370  0.0022797292  0.1403984944  1.4092430960 -0.1378133086 
##           126           127           128           129           130 
##  0.2102914439 -0.9043991666 -0.1115063872  0.6985155739 -0.4624749617 
##           131           132           133           134           135 
## -0.6102795850  0.3478914647  0.4239417571  0.5266492292  0.8362331631 
##           136           137           138           139           140 
##  0.1542930965  0.0003772821 -0.4372476296  0.4379790498 -0.5152786330 
##           141           142           143           144           145 
## -0.5911499955 -0.1838788028  0.1278717614  0.1278717614 -0.8813145316 
##           146           147           148           149           150 
##  0.0157168387  0.0620445787 -0.1622696720 -0.0797196312 -0.0216079913 
##           151           152           153           154           155 
## -0.1399085080  0.0981190824  0.2941354476  0.3137303280 -0.2079305096 
##           156           157           158           159 
##  0.3388250384  0.2077708910 -0.5454340178 -0.1734718748
shapiro.test(resid(fish_height)) # normality test in residual model
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(fish_height)
## W = 0.98169, p-value = 0.03333
                                 # data is normal, SAFE!
par(mfrow=c(2,2))
plot(fish_height)        

Outlier Detection

boxplot(fish_df$Weight, main="Weight")

boxplot(fish_df[,c(3,4,5)], main="Lengths")

boxplot(fish_df$Height, main="Height")

boxplot(fish_df$Width, main="Width")

### Removing Outliers
fish_df <- fish_df %>%
                filter(Weight<1500)
b1 = boxplot(fish_df$Weight, main="Weight")

b2 = boxplot(fish_df[,c(3,4,5)], main="Lengths")

b3 = boxplot(fish_df$Height, main="Height")

b4 = boxplot(fish_df$Width, main="Width")

### Correlation Table
cor(fish_df[,-1])
##            Weight   Length1   Length2   Length3    Height     Width
## Weight  1.0000000 0.8957426 0.8997591 0.9072922 0.8018712 0.9234322
## Length1 0.8957426 1.0000000 0.9994183 0.9906124 0.6763260 0.8891659
## Length2 0.8997591 0.9994183 1.0000000 0.9930493 0.6919540 0.8955835
## Length3 0.9072922 0.9906124 0.9930493 1.0000000 0.7560186 0.8970535
## Height  0.8018712 0.6763260 0.6919540 0.7560186 1.0000000 0.8005423
## Width   0.9234322 0.8891659 0.8955835 0.8970535 0.8005423 1.0000000
pairs(fish_df[,-1])

round(cor(fish_df[,-1]),3)
##         Weight Length1 Length2 Length3 Height Width
## Weight   1.000   0.896   0.900   0.907  0.802 0.923
## Length1  0.896   1.000   0.999   0.991  0.676 0.889
## Length2  0.900   0.999   1.000   0.993  0.692 0.896
## Length3  0.907   0.991   0.993   1.000  0.756 0.897
## Height   0.802   0.676   0.692   0.756  1.000 0.801
## Width    0.923   0.889   0.896   0.897  0.801 1.000

Data Partition (one of the cross validation methods)

Splitting model into training and testing

set.seed(1234)
ind=sample(2, nrow(fish_df), replace=TRUE, prob=c(0.7, 0.3))
training=fish_df[ind==1, ]
testing=fish_df[ind==2, ]
head(training)
##   Species Weight Length1 Length2 Length3  Height  Width
## 1   Bream    242    23.2    25.4    30.0 11.5200 4.0200
## 2   Bream    290    24.0    26.3    31.2 12.4800 4.3056
## 3   Bream    340    23.9    26.5    31.1 12.3778 4.6961
## 4   Bream    363    26.3    29.0    33.5 12.7300 4.4555
## 6   Bream    450    26.8    29.7    34.7 13.6024 4.9274
## 7   Bream    500    26.8    29.7    34.5 14.1795 5.2785
head(testing)
##    Species Weight Length1 Length2 Length3  Height  Width
## 5    Bream    430    26.5      29    34.0 12.4440 5.1340
## 14   Bream    340    29.5      32    37.3 13.9129 5.0728
## 16   Bream    600    29.4      32    37.2 15.4380 5.5800
## 26   Bream    725    31.8      35    40.9 16.3600 6.0532
## 28   Bream    714    32.7      36    41.5 16.5170 5.8515
## 29   Bream    850    32.8      36    41.6 16.8896 6.1984
# library(devtools)
# devtools::install_github("julianfaraway/faraway")
library(faraway)

model <- lm(Weight ~ Length1 + Length2 + Length3 + Width + Height, data = training)
summary(model)
## 
## Call:
## lm(formula = Weight ~ Length1 + Length2 + Length3 + Width + Height, 
##     data = training)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -163.85  -65.17  -16.16   38.55  239.78 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -400.297     27.881 -14.357  < 2e-16 ***
## Length1      111.964     34.201   3.274  0.00142 ** 
## Length2      -61.832     35.832  -1.726  0.08723 .  
## Length3      -31.398     15.188  -2.067  0.04106 *  
## Width         48.992     18.253   2.684  0.00840 ** 
## Height        39.543      7.598   5.205  9.1e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 92.07 on 110 degrees of freedom
## Multiple R-squared:  0.9088, Adjusted R-squared:  0.9046 
## F-statistic: 219.1 on 5 and 110 DF,  p-value: < 2.2e-16
vif(model)
##    Length1    Length2    Length3      Width     Height 
## 1172.99999 1487.94821  327.49198   11.36251   15.29040
# All the lengths have high vif values. But Length3 has much lower values than the other two.
# Lets predict another model with just the lengths.

model1 <- lm(Weight ~ Length1 + Length2 + Length3, data = training)
summary(model1)
## 
## Call:
## lm(formula = Weight ~ Length1 + Length2 + Length3, data = training)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -292.62  -65.22  -32.07   68.88  373.70 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -443.84      37.09 -11.966  < 2e-16 ***
## Length1       -43.88      41.71  -1.052  0.29510    
## Length2        31.12      45.16   0.689  0.49225    
## Length3        34.98      10.42   3.358  0.00107 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 126.4 on 112 degrees of freedom
## Multiple R-squared:  0.825,  Adjusted R-squared:  0.8203 
## F-statistic:   176 on 3 and 112 DF,  p-value: < 2.2e-16
vif(model1)
##    Length1    Length2    Length3 
##  926.11827 1254.71693   81.76322
# Again we found that the Length3 has a lower value.

model2 <- lm(Weight ~ Length3 + Width + Height, data = training)
summary(model2)
## 
## Call:
## lm(formula = Weight ~ Length3 + Width + Height, data = training)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -165.38  -68.86  -37.45   45.60  289.00 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -432.082     28.577 -15.120  < 2e-16 ***
## Length3       10.316      2.080   4.959 2.54e-06 ***
## Width         78.398     14.192   5.524 2.18e-07 ***
## Height        15.364      3.573   4.301 3.65e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 98.25 on 112 degrees of freedom
## Multiple R-squared:  0.8942, Adjusted R-squared:  0.8914 
## F-statistic: 315.6 on 3 and 112 DF,  p-value: < 2.2e-16
vif(model2)
##  Length3    Width   Height 
## 5.395211 6.030877 2.968547
# In the model2 all the vif values are less than 10. So, we are going to take those variable for further analysis.

plot(Weight ~ Length3 + Width + Height, training)

abline(model2, col="blue")

par(mfrow=c(2,2))
plot(model)

pred=predict(model2, testing)
head(pred)
##        5       14       16       26       28       29 
## 512.3641 564.1789 626.3428 715.7772 708.5664 742.5190
head(testing)
##    Species Weight Length1 Length2 Length3  Height  Width
## 5    Bream    430    26.5      29    34.0 12.4440 5.1340
## 14   Bream    340    29.5      32    37.3 13.9129 5.0728
## 16   Bream    600    29.4      32    37.2 15.4380 5.5800
## 26   Bream    725    31.8      35    40.9 16.3600 6.0532
## 28   Bream    714    32.7      36    41.5 16.5170 5.8515
## 29   Bream    850    32.8      36    41.6 16.8896 6.1984
tail(pred)
##       140       142       147       149       154       156 
##  635.5827  895.5962 -172.0217 -162.2747 -156.2789  -72.5796
tail(testing)
##     Species Weight Length1 Length2 Length3  Height  Width
## 140    Pike  770.0    44.8    48.0    51.2  7.6800 5.3760
## 142    Pike 1250.0    52.0    56.0    59.7 10.6863 6.9849
## 147   Smelt    9.8    10.7    11.2    12.4  2.0832 1.2772
## 149   Smelt   10.0    11.3    11.8    13.1  2.2139 1.2838
## 154   Smelt   12.2    12.1    13.0    13.8  2.2770 1.2558
## 156   Smelt   19.9    13.8    15.0    16.2  2.9322 1.8792

We can see the predicted values and the actual values are differs a lot and in some of the cases we are getting negative weights which is impossible. All the negative values are only occuring in the case of Smelt Species. Considering the other Species we can see less difference in the predicted and actual values. So, after eliminating Smelt Species we can predict the weight much better and approximately.

Visualization of Species Characteristics

# Assign each fish species a color (7 species, 7 colors)
unique(fish_df)
##       Species Weight Length1 Length2 Length3  Height  Width
## 1       Bream  242.0    23.2    25.4    30.0 11.5200 4.0200
## 2       Bream  290.0    24.0    26.3    31.2 12.4800 4.3056
## 3       Bream  340.0    23.9    26.5    31.1 12.3778 4.6961
## 4       Bream  363.0    26.3    29.0    33.5 12.7300 4.4555
## 5       Bream  430.0    26.5    29.0    34.0 12.4440 5.1340
## 6       Bream  450.0    26.8    29.7    34.7 13.6024 4.9274
## 7       Bream  500.0    26.8    29.7    34.5 14.1795 5.2785
## 8       Bream  390.0    27.6    30.0    35.0 12.6700 4.6900
## 9       Bream  450.0    27.6    30.0    35.1 14.0049 4.8438
## 10      Bream  500.0    28.5    30.7    36.2 14.2266 4.9594
## 11      Bream  475.0    28.4    31.0    36.2 14.2628 5.1042
## 12      Bream  500.0    28.7    31.0    36.2 14.3714 4.8146
## 13      Bream  500.0    29.1    31.5    36.4 13.7592 4.3680
## 14      Bream  340.0    29.5    32.0    37.3 13.9129 5.0728
## 15      Bream  600.0    29.4    32.0    37.2 14.9544 5.1708
## 16      Bream  600.0    29.4    32.0    37.2 15.4380 5.5800
## 17      Bream  700.0    30.4    33.0    38.3 14.8604 5.2854
## 18      Bream  700.0    30.4    33.0    38.5 14.9380 5.1975
## 19      Bream  610.0    30.9    33.5    38.6 15.6330 5.1338
## 20      Bream  650.0    31.0    33.5    38.7 14.4738 5.7276
## 21      Bream  575.0    31.3    34.0    39.5 15.1285 5.5695
## 22      Bream  685.0    31.4    34.0    39.2 15.9936 5.3704
## 23      Bream  620.0    31.5    34.5    39.7 15.5227 5.2801
## 24      Bream  680.0    31.8    35.0    40.6 15.4686 6.1306
## 25      Bream  700.0    31.9    35.0    40.5 16.2405 5.5890
## 26      Bream  725.0    31.8    35.0    40.9 16.3600 6.0532
## 27      Bream  720.0    32.0    35.0    40.6 16.3618 6.0900
## 28      Bream  714.0    32.7    36.0    41.5 16.5170 5.8515
## 29      Bream  850.0    32.8    36.0    41.6 16.8896 6.1984
## 30      Bream 1000.0    33.5    37.0    42.6 18.9570 6.6030
## 31      Bream  920.0    35.0    38.5    44.1 18.0369 6.3063
## 32      Bream  955.0    35.0    38.5    44.0 18.0840 6.2920
## 33      Bream  925.0    36.2    39.5    45.3 18.7542 6.7497
## 34      Bream  975.0    37.4    41.0    45.9 18.6354 6.7473
## 35      Bream  950.0    38.0    41.0    46.5 17.6235 6.3705
## 36      Roach   40.0    12.9    14.1    16.2  4.1472 2.2680
## 37      Roach   69.0    16.5    18.2    20.3  5.2983 2.8217
## 38      Roach   78.0    17.5    18.8    21.2  5.5756 2.9044
## 39      Roach   87.0    18.2    19.8    22.2  5.6166 3.1746
## 40      Roach  120.0    18.6    20.0    22.2  6.2160 3.5742
## 41      Roach    0.0    19.0    20.5    22.8  6.4752 3.3516
## 42      Roach  110.0    19.1    20.8    23.1  6.1677 3.3957
## 43      Roach  120.0    19.4    21.0    23.7  6.1146 3.2943
## 44      Roach  150.0    20.4    22.0    24.7  5.8045 3.7544
## 45      Roach  145.0    20.5    22.0    24.3  6.6339 3.5478
## 46      Roach  160.0    20.5    22.5    25.3  7.0334 3.8203
## 47      Roach  140.0    21.0    22.5    25.0  6.5500 3.3250
## 48      Roach  160.0    21.1    22.5    25.0  6.4000 3.8000
## 49      Roach  169.0    22.0    24.0    27.2  7.5344 3.8352
## 50      Roach  161.0    22.0    23.4    26.7  6.9153 3.6312
## 51      Roach  200.0    22.1    23.5    26.8  7.3968 4.1272
## 52      Roach  180.0    23.6    25.2    27.9  7.0866 3.9060
## 53      Roach  290.0    24.0    26.0    29.2  8.8768 4.4968
## 54      Roach  272.0    25.0    27.0    30.6  8.5680 4.7736
## 55      Roach  390.0    29.5    31.7    35.0  9.4850 5.3550
## 56  Whitefish  270.0    23.6    26.0    28.7  8.3804 4.2476
## 57  Whitefish  270.0    24.1    26.5    29.3  8.1454 4.2485
## 58  Whitefish  306.0    25.6    28.0    30.8  8.7780 4.6816
## 59  Whitefish  540.0    28.5    31.0    34.0 10.7440 6.5620
## 60  Whitefish  800.0    33.7    36.4    39.6 11.7612 6.5736
## 61  Whitefish 1000.0    37.3    40.0    43.5 12.3540 6.5250
## 62     Parkki   55.0    13.5    14.7    16.5  6.8475 2.3265
## 63     Parkki   60.0    14.3    15.5    17.4  6.5772 2.3142
## 64     Parkki   90.0    16.3    17.7    19.8  7.4052 2.6730
## 65     Parkki  120.0    17.5    19.0    21.3  8.3922 2.9181
## 66     Parkki  150.0    18.4    20.0    22.4  8.8928 3.2928
## 67     Parkki  140.0    19.0    20.7    23.2  8.5376 3.2944
## 68     Parkki  170.0    19.0    20.7    23.2  9.3960 3.4104
## 69     Parkki  145.0    19.8    21.5    24.1  9.7364 3.1571
## 70     Parkki  200.0    21.2    23.0    25.8 10.3458 3.6636
## 71     Parkki  273.0    23.0    25.0    28.0 11.0880 4.1440
## 72     Parkki  300.0    24.0    26.0    29.0 11.3680 4.2340
## 73      Perch    5.9     7.5     8.4     8.8  2.1120 1.4080
## 74      Perch   32.0    12.5    13.7    14.7  3.5280 1.9992
## 75      Perch   40.0    13.8    15.0    16.0  3.8240 2.4320
## 76      Perch   51.5    15.0    16.2    17.2  4.5924 2.6316
## 77      Perch   70.0    15.7    17.4    18.5  4.5880 2.9415
## 78      Perch  100.0    16.2    18.0    19.2  5.2224 3.3216
## 79      Perch   78.0    16.8    18.7    19.4  5.1992 3.1234
## 80      Perch   80.0    17.2    19.0    20.2  5.6358 3.0502
## 81      Perch   85.0    17.8    19.6    20.8  5.1376 3.0368
## 82      Perch   85.0    18.2    20.0    21.0  5.0820 2.7720
## 83      Perch  110.0    19.0    21.0    22.5  5.6925 3.5550
## 84      Perch  115.0    19.0    21.0    22.5  5.9175 3.3075
## 85      Perch  125.0    19.0    21.0    22.5  5.6925 3.6675
## 86      Perch  130.0    19.3    21.3    22.8  6.3840 3.5340
## 87      Perch  120.0    20.0    22.0    23.5  6.1100 3.4075
## 88      Perch  120.0    20.0    22.0    23.5  5.6400 3.5250
## 89      Perch  130.0    20.0    22.0    23.5  6.1100 3.5250
## 90      Perch  135.0    20.0    22.0    23.5  5.8750 3.5250
## 91      Perch  110.0    20.0    22.0    23.5  5.5225 3.9950
## 92      Perch  130.0    20.5    22.5    24.0  5.8560 3.6240
## 93      Perch  150.0    20.5    22.5    24.0  6.7920 3.6240
## 94      Perch  145.0    20.7    22.7    24.2  5.9532 3.6300
## 95      Perch  150.0    21.0    23.0    24.5  5.2185 3.6260
## 96      Perch  170.0    21.5    23.5    25.0  6.2750 3.7250
## 97      Perch  225.0    22.0    24.0    25.5  7.2930 3.7230
## 98      Perch  145.0    22.0    24.0    25.5  6.3750 3.8250
## 99      Perch  188.0    22.6    24.6    26.2  6.7334 4.1658
## 100     Perch  180.0    23.0    25.0    26.5  6.4395 3.6835
## 101     Perch  197.0    23.5    25.6    27.0  6.5610 4.2390
## 102     Perch  218.0    25.0    26.5    28.0  7.1680 4.1440
## 103     Perch  300.0    25.2    27.3    28.7  8.3230 5.1373
## 104     Perch  260.0    25.4    27.5    28.9  7.1672 4.3350
## 105     Perch  265.0    25.4    27.5    28.9  7.0516 4.3350
## 106     Perch  250.0    25.4    27.5    28.9  7.2828 4.5662
## 107     Perch  250.0    25.9    28.0    29.4  7.8204 4.2042
## 108     Perch  300.0    26.9    28.7    30.1  7.5852 4.6354
## 109     Perch  320.0    27.8    30.0    31.6  7.6156 4.7716
## 110     Perch  514.0    30.5    32.8    34.0 10.0300 6.0180
## 111     Perch  556.0    32.0    34.5    36.5 10.2565 6.3875
## 112     Perch  840.0    32.5    35.0    37.3 11.4884 7.7957
## 113     Perch  685.0    34.0    36.5    39.0 10.8810 6.8640
## 114     Perch  700.0    34.0    36.0    38.3 10.6091 6.7408
## 115     Perch  700.0    34.5    37.0    39.4 10.8350 6.2646
## 116     Perch  690.0    34.6    37.0    39.3 10.5717 6.3666
## 117     Perch  900.0    36.5    39.0    41.4 11.1366 7.4934
## 118     Perch  650.0    36.5    39.0    41.4 11.1366 6.0030
## 119     Perch  820.0    36.6    39.0    41.3 12.4313 7.3514
## 120     Perch  850.0    36.9    40.0    42.3 11.9286 7.1064
## 121     Perch  900.0    37.0    40.0    42.5 11.7300 7.2250
## 122     Perch 1015.0    37.0    40.0    42.4 12.3808 7.4624
## 123     Perch  820.0    37.1    40.0    42.5 11.1350 6.6300
## 124     Perch 1100.0    39.0    42.0    44.6 12.8002 6.8684
## 125     Perch 1000.0    39.8    43.0    45.2 11.9328 7.2772
## 126     Perch 1100.0    40.1    43.0    45.5 12.5125 7.4165
## 127     Perch 1000.0    40.2    43.5    46.0 12.6040 8.1420
## 128     Perch 1000.0    41.1    44.0    46.6 12.4888 7.5958
## 129      Pike  200.0    30.0    32.3    34.8  5.5680 3.3756
## 130      Pike  300.0    31.7    34.0    37.8  5.7078 4.1580
## 131      Pike  300.0    32.7    35.0    38.8  5.9364 4.3844
## 132      Pike  300.0    34.8    37.3    39.8  6.2884 4.0198
## 133      Pike  430.0    35.5    38.0    40.5  7.2900 4.5765
## 134      Pike  345.0    36.0    38.5    41.0  6.3960 3.9770
## 135      Pike  456.0    40.0    42.5    45.5  7.2800 4.3225
## 136      Pike  510.0    40.0    42.5    45.5  6.8250 4.4590
## 137      Pike  540.0    40.1    43.0    45.8  7.7860 5.1296
## 138      Pike  500.0    42.0    45.0    48.0  6.9600 4.8960
## 139      Pike  567.0    43.2    46.0    48.7  7.7920 4.8700
## 140      Pike  770.0    44.8    48.0    51.2  7.6800 5.3760
## 141      Pike  950.0    48.3    51.7    55.1  8.9262 6.1712
## 142      Pike 1250.0    52.0    56.0    59.7 10.6863 6.9849
## 143     Smelt    6.7     9.3     9.8    10.8  1.7388 1.0476
## 144     Smelt    7.5    10.0    10.5    11.6  1.9720 1.1600
## 145     Smelt    7.0    10.1    10.6    11.6  1.7284 1.1484
## 146     Smelt    9.7    10.4    11.0    12.0  2.1960 1.3800
## 147     Smelt    9.8    10.7    11.2    12.4  2.0832 1.2772
## 148     Smelt    8.7    10.8    11.3    12.6  1.9782 1.2852
## 149     Smelt   10.0    11.3    11.8    13.1  2.2139 1.2838
## 150     Smelt    9.9    11.3    11.8    13.1  2.2139 1.1659
## 151     Smelt    9.8    11.4    12.0    13.2  2.2044 1.1484
## 152     Smelt   12.2    11.5    12.2    13.4  2.0904 1.3936
## 153     Smelt   13.4    11.7    12.4    13.5  2.4300 1.2690
## 154     Smelt   12.2    12.1    13.0    13.8  2.2770 1.2558
## 155     Smelt   19.7    13.2    14.3    15.2  2.8728 2.0672
## 156     Smelt   19.9    13.8    15.0    16.2  2.9322 1.8792
COLORS <- c("Bream" = "red", "Roach" = "coral", "Whitefish" = "gold", "Parkki" = "darkolivegreen3", "Perch" = "slategray1", "Pike" = "purple", "Smelt" = "slateblue1")

# Using the fct_reorder library(forcats) to order the boxplots
# Using ggpubr to add the statistical testing at the top of each chart

library(forcats)
library(ggpubr)

### BOXPLOT of SPECIES and WEIGHT
ggplot(fish_df, aes(x=fct_reorder(Species, Weight, .desc=TRUE), y=Weight)) +
    geom_boxplot(aes(fill=Species)) +
    theme(legend.position="none") +
    ggtitle("Species and Weight") + 
    ylab("Weight (g)") + xlab("Species") +
    scale_fill_manual(values=COLORS) +
    stat_compare_means(method="kruskal.test")

### BOXPLOT of SPECIES and VERTICAL LENGTH
ggplot(fish_df, aes(x=fct_reorder(Species, Length1, .desc=TRUE), y=Length1)) +
    geom_boxplot(aes(fill=Species)) +
    theme(legend.position="none") +
    ggtitle("Species and Vertical Length") +
    ylab("Vertical Length (cm)") + xlab("Species") +
    scale_fill_manual(values=COLORS) +
    stat_compare_means(method="kruskal.test")

### BOXPLOT of SPECIES and CROSS LENGTH
ggplot(fish_df, aes(x=fct_reorder(Species, Length3, .desc=TRUE), y=Length3)) +
    geom_boxplot(aes(fill=Species)) +
    theme(legend.position="none") +
    ggtitle("Species and Cross Length") +
    ylab("Cross Length (cm)") + xlab("Species") +
    scale_fill_manual(values=COLORS) +
    stat_compare_means(method="kruskal.test")

### BOXPLOT of SPECIES and HEIGHT
ggplot(fish_df, aes(x=fct_reorder(Species, Height, .desc=TRUE), y=Height)) +
    geom_boxplot(aes(fill=Species)) +
    theme(legend.position="none") +
    ggtitle("Species and Height") +
    ylab("Height (cm)") + xlab("Species") +
    scale_fill_manual(values=COLORS) +
    stat_compare_means(method="kruskal.test")

### BOXPLOT of SPECIES and DIAGONAL WIDTH
ggplot(fish_df, aes(x=fct_reorder(Species, Width, .desc=TRUE), y=Width)) +
    geom_boxplot(aes(fill=Species)) +
    theme(legend.position="none") +
    ggtitle("Species and Diagonal Width") +
    ylab("Width (cm)") + xlab("Species") +
    scale_fill_manual(values=COLORS) +
    stat_compare_means(method="kruskal.test")