Import data:

#load packages
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
CA_wineproduction <- read.csv("C:/Users/12055/Documents/Career/Skill building/Data/Excel/Kaggle California_Wine_Production_1980_2020.csv")

#preview data
names(CA_wineproduction)
##  [1] "Year"                "CommodityCode"       "CropName"           
##  [4] "CountyCode"          "County"              "HarvestedAcres"     
##  [7] "Yield.Unit.Acre."    "Production"          "Price.Dollars.Unit."
## [10] "Unit"                "Value.Dollars."      "X"                  
## [13] "Reproduce.Yield"
head(CA_wineproduction)
##   Year CommodityCode   CropName CountyCode  County HarvestedAcres
## 1 2020        216299 GRAPESWINE          1 Alameda           2530
## 2 2019        216299 GRAPESWINE          1 Alameda           3390
## 3 2018        216299 GRAPESWINE          1 Alameda           3390
## 4 2017        216299 GRAPESWINE          1 Alameda           2420
## 5 2016        216299 GRAPESWINE          1 Alameda           2630
## 6 2015        216299 GRAPESWINE          1 Alameda           2400
##   Yield.Unit.Acre. Production Price.Dollars.Unit. Unit Value.Dollars.  X
## 1             5.14      13000             1497.69 Tons       19470000 NA
## 2             5.34      18100             1503.04 Tons       27205000 NA
## 3             5.34      18100             1503.04 Tons       27205000 NA
## 4             5.08      12300             1447.32 Tons       17802000 NA
## 5             4.83      12700             1432.20 Tons       18189000 NA
## 6             4.14       9940             1529.98 Tons       15208000 NA
##   Reproduce.Yield
## 1              NA
## 2              NA
## 3              NA
## 4              NA
## 5              NA
## 6              NA
CA_wine2020 <- CA_wineproduction %>%
  select(Year, County, Production ) %>%
  filter (Year ==2020)
  
CA_winefres <- CA_wineproduction %>%
  select(Year, County, Production ) %>%
  filter (Year>=2010& Year<=2020 & County=="Fresno")
head(CA_winefres)
##   Year County Production
## 1 2020 Fresno     690000
## 2 2019 Fresno     740000
## 3 2018 Fresno     769000
## 4 2017 Fresno     795000
## 5 2016 Fresno     755000
## 6 2015 Fresno     770000
plot(CA_winefres$Year, CA_winefres$Production, main= "Wine Production in Fresno County between 2010 and 2020")

CA_winesanjoa <- CA_wineproduction %>%
  select(Year, County, Production ) %>%
  filter (Year>=2010& Year<=2020 & County=="SanJoaquin")
head(CA_winesanjoa)
##   Year     County Production
## 1 2020 SanJoaquin     608000
## 2 2019 SanJoaquin     682000
## 3 2018 SanJoaquin     751000
## 4 2017 SanJoaquin     666000
## 5 2016 SanJoaquin     716000
## 6 2015 SanJoaquin     610000
plot(CA_winesanjoa$Year, CA_winesanjoa$Production, main= "Wine Production in San Joaquin County between 2010 and 2020")

Key Insights:

Production

  • The Top 5 wine producing counties in 2020 were Fresno, San Joaquin, Madera, Sacramento, and Kern.

  • The highest production across the years 1980 to 2020 has been demonstrated in the South Central Valley.

library(xlsx)
## Warning: package 'xlsx' was built under R version 4.0.5
library(writexl)

setwd('C:/Users/12055/Documents/Career/Skill building')

CA_winedistrict <- read.xlsx(file="C:/Users/12055/Documents/Career/Skill building/Data/Excel/Table 1 California_Wine_Production_1980_2020.xlsx", sheetName= "District")

graph_CA_winedistrict <- CA_winedistrict%>%
  group_by(Region, Year)%>%
  summarize(DistrictProduction = sum(Production))%>%
  filter(!is.na(Region))
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
ggplot(data=graph_CA_winedistrict, aes(x=Year, y=DistrictProduction,group=Region, color=Region))+
  geom_line()

Yield (Unit per Acre)

alt text here

Data Provenance

https://www.nass.usda.gov/Statistics_by_State/California/Publications/AgComm/index.php

Volpe. (2010). Wine-grape production trends reflect evolving consumer demand over 30 years. California Agriculture., 64, 42–46. https://doi.org/info:doi/