Import data:
#load packages
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
CA_wineproduction <- read.csv("C:/Users/12055/Documents/Career/Skill building/Data/Excel/Kaggle California_Wine_Production_1980_2020.csv")
#preview data
names(CA_wineproduction)
## [1] "Year" "CommodityCode" "CropName"
## [4] "CountyCode" "County" "HarvestedAcres"
## [7] "Yield.Unit.Acre." "Production" "Price.Dollars.Unit."
## [10] "Unit" "Value.Dollars." "X"
## [13] "Reproduce.Yield"
head(CA_wineproduction)
## Year CommodityCode CropName CountyCode County HarvestedAcres
## 1 2020 216299 GRAPESWINE 1 Alameda 2530
## 2 2019 216299 GRAPESWINE 1 Alameda 3390
## 3 2018 216299 GRAPESWINE 1 Alameda 3390
## 4 2017 216299 GRAPESWINE 1 Alameda 2420
## 5 2016 216299 GRAPESWINE 1 Alameda 2630
## 6 2015 216299 GRAPESWINE 1 Alameda 2400
## Yield.Unit.Acre. Production Price.Dollars.Unit. Unit Value.Dollars. X
## 1 5.14 13000 1497.69 Tons 19470000 NA
## 2 5.34 18100 1503.04 Tons 27205000 NA
## 3 5.34 18100 1503.04 Tons 27205000 NA
## 4 5.08 12300 1447.32 Tons 17802000 NA
## 5 4.83 12700 1432.20 Tons 18189000 NA
## 6 4.14 9940 1529.98 Tons 15208000 NA
## Reproduce.Yield
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
CA_wine2020 <- CA_wineproduction %>%
select(Year, County, Production ) %>%
filter (Year ==2020)
CA_winefres <- CA_wineproduction %>%
select(Year, County, Production ) %>%
filter (Year>=2010& Year<=2020 & County=="Fresno")
head(CA_winefres)
## Year County Production
## 1 2020 Fresno 690000
## 2 2019 Fresno 740000
## 3 2018 Fresno 769000
## 4 2017 Fresno 795000
## 5 2016 Fresno 755000
## 6 2015 Fresno 770000
plot(CA_winefres$Year, CA_winefres$Production, main= "Wine Production in Fresno County between 2010 and 2020")
CA_winesanjoa <- CA_wineproduction %>%
select(Year, County, Production ) %>%
filter (Year>=2010& Year<=2020 & County=="SanJoaquin")
head(CA_winesanjoa)
## Year County Production
## 1 2020 SanJoaquin 608000
## 2 2019 SanJoaquin 682000
## 3 2018 SanJoaquin 751000
## 4 2017 SanJoaquin 666000
## 5 2016 SanJoaquin 716000
## 6 2015 SanJoaquin 610000
plot(CA_winesanjoa$Year, CA_winesanjoa$Production, main= "Wine Production in San Joaquin County between 2010 and 2020")
The Top 5 wine producing counties in 2020 were Fresno, San Joaquin, Madera, Sacramento, and Kern.
The highest production across the years 1980 to 2020 has been demonstrated in the South Central Valley.
library(xlsx)
## Warning: package 'xlsx' was built under R version 4.0.5
library(writexl)
setwd('C:/Users/12055/Documents/Career/Skill building')
CA_winedistrict <- read.xlsx(file="C:/Users/12055/Documents/Career/Skill building/Data/Excel/Table 1 California_Wine_Production_1980_2020.xlsx", sheetName= "District")
graph_CA_winedistrict <- CA_winedistrict%>%
group_by(Region, Year)%>%
summarize(DistrictProduction = sum(Production))%>%
filter(!is.na(Region))
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
ggplot(data=graph_CA_winedistrict, aes(x=Year, y=DistrictProduction,group=Region, color=Region))+
geom_line()
alt text here
https://www.nass.usda.gov/Statistics_by_State/California/Publications/AgComm/index.php
Volpe. (2010). Wine-grape production trends reflect evolving consumer demand over 30 years. California Agriculture., 64, 42–46. https://doi.org/info:doi/