Introduction

This is an Exploratory Data Analysis on fishing vessels data from 2008 - 2015 port landings in UK.

This dataset is taken from https://www.kaggle.com/theflyingmunkey/uk-fleet-landings

df <- read_csv("~/Documents/Rstudio/datasets/UK_fleet_landings.csv")
## Observations: 672,256
## Variables: 14
## $ year               <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2...
## $ month              <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ port_of_landing    <chr> "Aberdaran", "Aberdaran", "Aberdaran", "Abe...
## $ port_nationality   <chr> "UK - Wales", "UK - Wales", "UK - Wales", "...
## $ vessel_nationality <chr> "UK - Wales", "UK - Wales", "UK - Wales", "...
## $ length_group       <chr> "10m and under", "10m and under", "10m and ...
## $ gear_category      <chr> "Pots and traps", "Pots and traps", "Pots a...
## $ species_code       <chr> "BSS", "COD", "CRE", "LBE", "CRE", "ANF", "...
## $ species_name       <chr> "Bass", "Cod", "Crabs (C.P.Mixed Sexes)", "...
## $ species            <chr> "Bass", "Cod", "Crabs", "Lobsters", "Crabs"...
## $ species_group      <chr> "Demersal", "Demersal", "Shellfish", "Shell...
## $ live_weight        <dbl> 0.005, 0.001, 0.028, 0.020, 0.958, 0.060, 7...
## $ landed_weight      <dbl> 0.005, 0.001, 0.028, 0.020, 0.958, 0.020, 5...
## $ value_gbp          <dbl> 27.60, 3.30, 56.00, 88.10, 1279.80, 124.00,...

Data manipulation

##       year          month             port_of_landing  
##  Min.   :2008   Min.   : 1.000   Newlyn       : 24020  
##  1st Qu.:2010   1st Qu.: 4.000   Brixham      : 22924  
##  Median :2011   Median : 7.000   Plymouth     : 21639  
##  Mean   :2012   Mean   : 6.583   Milford Haven: 15026  
##  3rd Qu.:2013   3rd Qu.: 9.000   Newhaven     : 13557  
##  Max.   :2015   Max.   :12.000   Mevagissey   : 13150  
##                                  (Other)      :561923  
##               port_nationality              vessel_nationality
##  UK - England         :411828   UK - England         :423117  
##  UK - Scotland        :146636   UK - Scotland        :151769  
##  UK - Wales           : 39536   UK - Northern Ireland: 26962  
##  UK - Northern Ireland: 19150   UK - Wales           : 26718  
##  Netherlands          : 17976   Belgium              : 18153  
##  France               : 10972   France               :  7019  
##  (Other)              : 26141   (Other)              : 18501  
##         length_group                 gear_category     species_code   
##  10m and under:359839   Demersal trawl/seine:278925   -      : 68767  
##  Over 10m     :312400   Drift and fixed nets:138609   LBE    : 41035  
##                         Pots and traps      :133957   CRE    : 37486  
##                         Beam trawl          : 47973   COD    : 21854  
##                         Gears using hooks   : 35350   NEP    : 20598  
##                         Dredge              : 31296   SOL    : 19828  
##                         (Other)             :  6129   (Other):462671  
##                     species_name               species      
##  -                        : 68767   Crabs          : 70663  
##  Lobsters                 : 41035   Other Demersal : 61451  
##  Crabs (C.P.Mixed Sexes)  : 37486   Skates and Rays: 48156  
##  Cod                      : 21854   Lobsters       : 47016  
##  Nephrops (Norway Lobster): 20598   Sole           : 25181  
##  Sole                     : 19828   Cod            : 24988  
##  (Other)                  :462671   (Other)        :394784  
##    species_group     live_weight       landed_weight     
##  Demersal :435483   Min.   :   -0.02   Min.   :   -0.02  
##  Pelagic  : 26833   1st Qu.:    0.01   1st Qu.:    0.01  
##  Shellfish:209923   Median :    0.10   Median :    0.09  
##                     Mean   :    8.61   Mean   :    8.07  
##                     3rd Qu.:    0.76   3rd Qu.:    0.69  
##                     Max.   :32942.74   Max.   :32942.74  
##                                                          
##    value_gbp       
##  Min.   :   -1979  
##  1st Qu.:      24  
##  Median :     197  
##  Mean   :    9999  
##  3rd Qu.:    1688  
##  Max.   :28941257  
## 
ggplot(df,aes(x=port_nationality,fill=port_nationality))+
  geom_histogram(stat="count",aes(y=..count../sum(..count..)),binwidth=1)+
  theme(axis.text.x=element_text(angle=90,hjust=0.5,vjust=0),legend.position="none")+labs(y="Percent",title="Percentage of Port Nationalities")

ggplot(df,aes(x=gear_category,fill=gear_category))+
  geom_histogram(stat="count",aes(y=..count../sum(..count..)),binwidth=1)+
  theme(axis.text.x=element_text(angle=90,hjust=0.5,vjust=0),legend.position="none")+labs(y="Percent",title="Percentage of Gear Categories")

ggplot(df,aes(x=species_group,fill=species_group))+
  geom_histogram(stat="count",aes(y=..count../sum(..count..)),binwidth=1)+
  theme(axis.text.x=element_text(angle=90,hjust=0.5,vjust=0),legend.position="none")+labs(y="Percent",title="Percentage of Species Group")

ggplot(df,aes(x=length_group,fill=length_group))+
  geom_histogram(stat="count",aes(y=..count../sum(..count..)),binwidth=1)+
  theme(axis.text.x=element_text(angle=90,hjust=0.5,vjust=0),legend.position="none")+labs(y="Percent",title="Percentage of Length Group")

ggplot(by_gear, aes(x=reorder(gear_category ,-Total), Total)) + 
  geom_bar(fill = "maroon", stat = "identity") +
  mytheme() +
  ggtitle("Gear Category and Species group") + facet_wrap(~by_gear$species_group)

ggplot(by_port_nationality, aes(x=reorder(port_nationality ,-Total), Total)) + 
  geom_bar(fill = "red", stat = "identity") +
  mytheme() +
  ggtitle("Port nationality and Species group") + facet_wrap(~by_port_nationality$species_group)

ggplot(by_year, aes(x=species_group, Total)) + 
  geom_bar(fill = "blue", stat = "identity") +
  mytheme() +
  ggtitle("By year and Species Group") + facet_wrap(~by_year$year)

ggplot(by_length, aes(port_nationality, Total)) + 
  geom_bar(fill = "blue", stat = "identity") +
  mytheme() +
  ggtitle("By Port nationality and Ship Length") + facet_wrap(~by_length$length_group) + coord_flip()

ggplot(by_month, aes(month, Total, fill = species_group)) + 
  geom_bar(position = "fill", stat = "identity") +
  mytheme() +
  ggtitle("By Month, Year and Species Group") + facet_wrap(~by_month$year)

a1 <- ggplot(by_live_weight, aes(species_group, live_weight, fill = species_group)) + 
  geom_bar(position = "fill", stat = "identity") +
  mytheme() +
  ggtitle("Live weight and Species Group")

a2 <- ggplot(by_value_gbp, aes(species_group, value_gbp, fill = species_group)) + 
  geom_bar(position = "fill", stat = "identity") +
  mytheme() +
  ggtitle("Value GBP and Species Group")

ggplot(by_landed_weight, aes(species_group, landed_weight, fill = species_group)) + 
  geom_bar(position = "fill", stat = "identity") +
  mytheme() +
  ggtitle("Landed Weight and Species Group")

grid.arrange(a1,a2, nrow=2, ncol=2)

ggplot(by_vessel_nationality, aes(x=reorder(vessel_nationality, -Total), Total)) + 
  geom_bar(fill = "red", stat = "identity") +
  mytheme() +
  labs(title= "Vessel Nationalities and Gear Categories", x="Nationality of Vessels") + facet_wrap(~by_vessel_nationality$gear_category)

ggplot(by_vessel_nationality, aes(x=reorder(vessel_nationality, -Total), Total)) + 
  geom_bar(fill = "red", stat = "identity") +
  mytheme() +
  labs(title = "Vessel Nationalities and Species Group", x="Nationality of Vessels") + facet_wrap(~by_vessel_nationality$species_group)

ggplot(port_landing, aes(x=reorder(port_of_landing, -Total), Total)) + 
  geom_bar(fill = "red", stat = "identity") +
  mytheme() +
  labs(title = "Port Landings and Gear Categories", x="Landing Ports") + facet_wrap(~port_landing$gear_category)