This dataset #3 is 25 years worth of catfish stocks
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
data3 <- read.csv("CatfishFarm.csv", header=FALSE)
head(data3)
## V1 V2 V3 V4 V5 V6 V7
## 1 Size category 1992 1993 1994 1995 1996 1997
## 2 Broodfish 1/ 1,491 1,169 1,183 1,301 1,171 1,163
## 3 Fingerling/fry 2/ 849,412 669,491 648,628 724,693 823,397 873,457
## 4 Stockers 3/ 634,353 571,254 548,207 554,342 627,834 754,816
## 5 Small foodsize 4/ 166,731 153,600 134,314 138,160 156,297 178,448
## 6 Medium foodsize 5/ 70,495 61,894 48,851 59,159 64,858 84,725
## V8 V9 V10 V11 V12 V13 V14 V15
## 1 1998 1999 2000 2001 2002 2003 2004 2005
## 2 1,187 1,155 1,377 1,327 1,171 1,303 1,113 1,053
## 3 975,542 986,368 1,053,300 1,023,533 1,066,400 990,163 745,849 712,144
## 4 607,878 678,682 790,683 845,287 676,378 775,226 890,275 660,000
## 5 178,511 182,251 200,032 239,655 287,591 254,920 261,323 243,090
## 6 62,140 63,049 77,149 87,926 106,117 127,908 109,120 95,240
## V16 V17 V18 V19 V20 V21 V22 V23
## 1 2006 2007 2008 2009 2010 2011 2012 2013
## 2 1,091 886 801 704 536 495 562 540
## 3 1,045,266 985,620 951,910 728,340 429,590 568,990 451,100 398,510
## 4 781,958 586,320 688,844 586,069 366,090 380,660 463,485 339,260
## 5 214,848 210,340 204,750 193,870 169,030 115,560 112,970 103,520
## 6 103,591 104,080 107,800 105,610 91,790 54,130 64,740 58,015
## V24 V25 V26
## 1 2014 2015 2016
## 2 650 577 520
## 3 420,060 449,510 328,570
## 4 289,080 248,790 204,800
## 5 102,190 96,810 100,850
## 6 50,600 48,220 45,775
Drop the first row, and tidy by using rename and gather. There was an issue with this data set regarding column names and I felt this was the best way to do it.
data3 <- data3[-c(1), ]
# tidy up: rename and gather
catfish <- data3 %>%
rename("Category"=V1,
"1992"=V2,
"1993"=V3,
"1994"=V4,
"1995"=V5,
"1996"=V6,
"1997"=V7,
"1998"=V8,
"1999"=V9,
"2000"=V10,
"2001"=V11,
"2002"=V12,
"2003"=V13,
"2004"=V14,
"2005"=V15,
"2006"=V16,
"2007"=V17,
"2008"=V18,
"2009"=V19,
"2010"=V20,
"2011"=V21,
"2012"=V22,
"2013"=V23,
"2014"=V24,
"2015"=V25,
"2016"=V26) %>%
gather(-Category, key = "Year", value = "Stock")
## Warning: attributes are not identical across measure variables;
## they will be dropped
head(catfish)
## Category Year Stock
## 1 Broodfish 1/ 1992 1,491
## 2 Fingerling/fry 2/ 1992 849,412
## 3 Stockers 3/ 1992 634,353
## 4 Small foodsize 4/ 1992 166,731
## 5 Medium foodsize 5/ 1992 70,495
## 6 Large foodsize 6/ 1992 6,769
remove the commas from Stock so they will sum together, and compare stock by size/category
catfish$Category <- gsub("/","", catfish$Category)
catfish$Stock <- as.numeric(gsub(",", "", catfish$Stock))
Stock.by.Size <- catfish %>%
select(Category, Stock) %>%
group_by(Category) %>%
summarise(Stock.by.Category = sum(Stock))
head(Stock.by.Size)
## # A tibble: 6 x 2
## Category Stock.by.Category
## <chr> <dbl>
## 1 Broodfish 1 24526
## 2 Fingerlingfry 2 18899843
## 3 Large foodsize 6 185485
## 4 Medium foodsize 5 1952982
## 5 Small foodsize 4 4399661
## 6 Stockers 3 14550571
compare stock by year
Stock.by.year <- catfish %>%
select(Year, Stock) %>%
group_by(Year) %>%
summarise(Stock = sum(Stock))
head(Stock.by.year)
## # A tibble: 6 x 2
## Year Stock
## <chr> <dbl>
## 1 1992 1729251
## 2 1993 1464106
## 3 1994 1386379
## 4 1995 1482191
## 5 1996 1680201
## 6 1997 1900419