This is an Exploratory Data Analysis on fishing vessels data from 2008 - 2015 port landings in UK.
This dataset is taken from https://www.kaggle.com/theflyingmunkey/uk-fleet-landings
df <- read_csv("~/Documents/Rstudio/datasets/UK_fleet_landings.csv")
## Observations: 672,256
## Variables: 14
## $ year <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2...
## $ month <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ port_of_landing <chr> "Aberdaran", "Aberdaran", "Aberdaran", "Abe...
## $ port_nationality <chr> "UK - Wales", "UK - Wales", "UK - Wales", "...
## $ vessel_nationality <chr> "UK - Wales", "UK - Wales", "UK - Wales", "...
## $ length_group <chr> "10m and under", "10m and under", "10m and ...
## $ gear_category <chr> "Pots and traps", "Pots and traps", "Pots a...
## $ species_code <chr> "BSS", "COD", "CRE", "LBE", "CRE", "ANF", "...
## $ species_name <chr> "Bass", "Cod", "Crabs (C.P.Mixed Sexes)", "...
## $ species <chr> "Bass", "Cod", "Crabs", "Lobsters", "Crabs"...
## $ species_group <chr> "Demersal", "Demersal", "Shellfish", "Shell...
## $ live_weight <dbl> 0.005, 0.001, 0.028, 0.020, 0.958, 0.060, 7...
## $ landed_weight <dbl> 0.005, 0.001, 0.028, 0.020, 0.958, 0.020, 5...
## $ value_gbp <dbl> 27.60, 3.30, 56.00, 88.10, 1279.80, 124.00,...
Data manipulation
## year month port_of_landing
## Min. :2008 Min. : 1.000 Newlyn : 24020
## 1st Qu.:2010 1st Qu.: 4.000 Brixham : 22924
## Median :2011 Median : 7.000 Plymouth : 21639
## Mean :2012 Mean : 6.583 Milford Haven: 15026
## 3rd Qu.:2013 3rd Qu.: 9.000 Newhaven : 13557
## Max. :2015 Max. :12.000 Mevagissey : 13150
## (Other) :561923
## port_nationality vessel_nationality
## UK - England :411828 UK - England :423117
## UK - Scotland :146636 UK - Scotland :151769
## UK - Wales : 39536 UK - Northern Ireland: 26962
## UK - Northern Ireland: 19150 UK - Wales : 26718
## Netherlands : 17976 Belgium : 18153
## France : 10972 France : 7019
## (Other) : 26141 (Other) : 18501
## length_group gear_category species_code
## 10m and under:359839 Demersal trawl/seine:278925 - : 68767
## Over 10m :312400 Drift and fixed nets:138609 LBE : 41035
## Pots and traps :133957 CRE : 37486
## Beam trawl : 47973 COD : 21854
## Gears using hooks : 35350 NEP : 20598
## Dredge : 31296 SOL : 19828
## (Other) : 6129 (Other):462671
## species_name species
## - : 68767 Crabs : 70663
## Lobsters : 41035 Other Demersal : 61451
## Crabs (C.P.Mixed Sexes) : 37486 Skates and Rays: 48156
## Cod : 21854 Lobsters : 47016
## Nephrops (Norway Lobster): 20598 Sole : 25181
## Sole : 19828 Cod : 24988
## (Other) :462671 (Other) :394784
## species_group live_weight landed_weight
## Demersal :435483 Min. : -0.02 Min. : -0.02
## Pelagic : 26833 1st Qu.: 0.01 1st Qu.: 0.01
## Shellfish:209923 Median : 0.10 Median : 0.09
## Mean : 8.61 Mean : 8.07
## 3rd Qu.: 0.76 3rd Qu.: 0.69
## Max. :32942.74 Max. :32942.74
##
## value_gbp
## Min. : -1979
## 1st Qu.: 24
## Median : 197
## Mean : 9999
## 3rd Qu.: 1688
## Max. :28941257
##
ggplot(df,aes(x=port_nationality,fill=port_nationality))+
geom_histogram(stat="count",aes(y=..count../sum(..count..)),binwidth=1)+
theme(axis.text.x=element_text(angle=90,hjust=0.5,vjust=0),legend.position="none")+labs(y="Percent",title="Percentage of Port Nationalities")
ggplot(df,aes(x=gear_category,fill=gear_category))+
geom_histogram(stat="count",aes(y=..count../sum(..count..)),binwidth=1)+
theme(axis.text.x=element_text(angle=90,hjust=0.5,vjust=0),legend.position="none")+labs(y="Percent",title="Percentage of Gear Categories")
ggplot(df,aes(x=species_group,fill=species_group))+
geom_histogram(stat="count",aes(y=..count../sum(..count..)),binwidth=1)+
theme(axis.text.x=element_text(angle=90,hjust=0.5,vjust=0),legend.position="none")+labs(y="Percent",title="Percentage of Species Group")
ggplot(df,aes(x=length_group,fill=length_group))+
geom_histogram(stat="count",aes(y=..count../sum(..count..)),binwidth=1)+
theme(axis.text.x=element_text(angle=90,hjust=0.5,vjust=0),legend.position="none")+labs(y="Percent",title="Percentage of Length Group")
ggplot(by_gear, aes(x=reorder(gear_category ,-Total), Total)) +
geom_bar(fill = "maroon", stat = "identity") +
mytheme() +
ggtitle("Gear Category and Species group") + facet_wrap(~by_gear$species_group)
ggplot(by_port_nationality, aes(x=reorder(port_nationality ,-Total), Total)) +
geom_bar(fill = "red", stat = "identity") +
mytheme() +
ggtitle("Port nationality and Species group") + facet_wrap(~by_port_nationality$species_group)
ggplot(by_year, aes(x=species_group, Total)) +
geom_bar(fill = "blue", stat = "identity") +
mytheme() +
ggtitle("By year and Species Group") + facet_wrap(~by_year$year)
ggplot(by_length, aes(port_nationality, Total)) +
geom_bar(fill = "blue", stat = "identity") +
mytheme() +
ggtitle("By Port nationality and Ship Length") + facet_wrap(~by_length$length_group) + coord_flip()
ggplot(by_month, aes(month, Total, fill = species_group)) +
geom_bar(position = "fill", stat = "identity") +
mytheme() +
ggtitle("By Month, Year and Species Group") + facet_wrap(~by_month$year)
a1 <- ggplot(by_live_weight, aes(species_group, live_weight, fill = species_group)) +
geom_bar(position = "fill", stat = "identity") +
mytheme() +
ggtitle("Live weight and Species Group")
a2 <- ggplot(by_value_gbp, aes(species_group, value_gbp, fill = species_group)) +
geom_bar(position = "fill", stat = "identity") +
mytheme() +
ggtitle("Value GBP and Species Group")
ggplot(by_landed_weight, aes(species_group, landed_weight, fill = species_group)) +
geom_bar(position = "fill", stat = "identity") +
mytheme() +
ggtitle("Landed Weight and Species Group")
grid.arrange(a1,a2, nrow=2, ncol=2)
ggplot(by_vessel_nationality, aes(x=reorder(vessel_nationality, -Total), Total)) +
geom_bar(fill = "red", stat = "identity") +
mytheme() +
labs(title= "Vessel Nationalities and Gear Categories", x="Nationality of Vessels") + facet_wrap(~by_vessel_nationality$gear_category)
ggplot(by_vessel_nationality, aes(x=reorder(vessel_nationality, -Total), Total)) +
geom_bar(fill = "red", stat = "identity") +
mytheme() +
labs(title = "Vessel Nationalities and Species Group", x="Nationality of Vessels") + facet_wrap(~by_vessel_nationality$species_group)
ggplot(port_landing, aes(x=reorder(port_of_landing, -Total), Total)) +
geom_bar(fill = "red", stat = "identity") +
mytheme() +
labs(title = "Port Landings and Gear Categories", x="Landing Ports") + facet_wrap(~port_landing$gear_category)