library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidyr)
library(dplyr)
library(ggplot2)
library(readr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stringr)
wine_original <- as_tibble(read_csv('winemag-data.csv'))
## Rows: 129971 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): country, description, designation, province, region_1, region_2, t...
## dbl (3): id, points, price
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
wine <- as_tibble(read_csv('winemag-data.csv'))
## Rows: 129971 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): country, description, designation, province, region_1, region_2, t...
## dbl (3): id, points, price
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(wine)
## tibble [129,971 × 14] (S3: tbl_df/tbl/data.frame)
## $ id : num [1:129971] 0 1 2 3 4 5 6 7 8 9 ...
## $ country : chr [1:129971] "Italy" "Portugal" "US" "US" ...
## $ description : chr [1:129971] "Aromas include tropical fruit, broom, brimstone and dried herb. The palate isn't overly expressive, offering un"| __truncated__ "This is ripe and fruity, a wine that is smooth while still structured. Firm tannins are filled out with juicy r"| __truncated__ "Tart and snappy, the flavors of lime flesh and rind dominate. Some green pineapple pokes through, with crisp ac"| __truncated__ "Pineapple rind, lemon pith and orange blossom start off the aromas. The palate is a bit more opulent, with note"| __truncated__ ...
## $ designation : chr [1:129971] "Vulkà Bianco" "Avidagos" NA "Reserve Late Harvest" ...
## $ points : num [1:129971] 87 87 87 87 87 87 87 87 87 87 ...
## $ price : num [1:129971] NA 15 14 13 65 15 16 24 12 27 ...
## $ province : chr [1:129971] "Sicily & Sardinia" "Douro" "Oregon" "Michigan" ...
## $ region_1 : chr [1:129971] "Etna" NA "Willamette Valley" "Lake Michigan Shore" ...
## $ region_2 : chr [1:129971] NA NA "Willamette Valley" NA ...
## $ taster_name : chr [1:129971] "Kerin O’Keefe" "Roger Voss" "Paul Gregutt" "Alexander Peartree" ...
## $ taster_twitter_handle: chr [1:129971] "@kerinokeefe" "@vossroger" "@paulgwine " NA ...
## $ title : chr [1:129971] "Nicosia 2013 Vulkà Bianco (Etna)" "Quinta dos Avidagos 2011 Avidagos Red (Douro)" "Rainstorm 2013 Pinot Gris (Willamette Valley)" "St. Julian 2013 Reserve Late Harvest Riesling (Lake Michigan Shore)" ...
## $ variety : chr [1:129971] "White Blend" "Portuguese Red" "Pinot Gris" "Riesling" ...
## $ winery : chr [1:129971] "Nicosia" "Quinta dos Avidagos" "Rainstorm" "St. Julian" ...
head(wine, n=3)
## # A tibble: 3 × 14
## id country description designation points price province region_1 region_2
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 0 Italy Aromas inc… Vulkà Bian… 87 NA Sicily … Etna <NA>
## 2 1 Portugal This is ri… Avidagos 87 15 Douro <NA> <NA>
## 3 2 US Tart and s… <NA> 87 14 Oregon Willame… Willame…
## # … with 5 more variables: taster_name <chr>, taster_twitter_handle <chr>,
## # title <chr>, variety <chr>, winery <chr>
tail(wine, n=3)
## # A tibble: 3 × 14
## id country description designation points price province region_1 region_2
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 129968 France Well-drain… Kritt 90 30 Alsace Alsace <NA>
## 2 129969 France A dry styl… <NA> 90 32 Alsace Alsace <NA>
## 3 129970 France Big, rich … Lieu-dit H… 90 21 Alsace Alsace <NA>
## # … with 5 more variables: taster_name <chr>, taster_twitter_handle <chr>,
## # title <chr>, variety <chr>, winery <chr>
wine %>%
filter(price != "NA") %>%
str()
## tibble [120,975 × 14] (S3: tbl_df/tbl/data.frame)
## $ id : num [1:120975] 1 2 3 4 5 6 7 8 9 10 ...
## $ country : chr [1:120975] "Portugal" "US" "US" "US" ...
## $ description : chr [1:120975] "This is ripe and fruity, a wine that is smooth while still structured. Firm tannins are filled out with juicy r"| __truncated__ "Tart and snappy, the flavors of lime flesh and rind dominate. Some green pineapple pokes through, with crisp ac"| __truncated__ "Pineapple rind, lemon pith and orange blossom start off the aromas. The palate is a bit more opulent, with note"| __truncated__ "Much like the regular bottling from 2012, this comes across as rather rough and tannic, with rustic, earthy, he"| __truncated__ ...
## $ designation : chr [1:120975] "Avidagos" NA "Reserve Late Harvest" "Vintner's Reserve Wild Child Block" ...
## $ points : num [1:120975] 87 87 87 87 87 87 87 87 87 87 ...
## $ price : num [1:120975] 15 14 13 65 15 16 24 12 27 19 ...
## $ province : chr [1:120975] "Douro" "Oregon" "Michigan" "Oregon" ...
## $ region_1 : chr [1:120975] NA "Willamette Valley" "Lake Michigan Shore" "Willamette Valley" ...
## $ region_2 : chr [1:120975] NA "Willamette Valley" NA "Willamette Valley" ...
## $ taster_name : chr [1:120975] "Roger Voss" "Paul Gregutt" "Alexander Peartree" "Paul Gregutt" ...
## $ taster_twitter_handle: chr [1:120975] "@vossroger" "@paulgwine " NA "@paulgwine " ...
## $ title : chr [1:120975] "Quinta dos Avidagos 2011 Avidagos Red (Douro)" "Rainstorm 2013 Pinot Gris (Willamette Valley)" "St. Julian 2013 Reserve Late Harvest Riesling (Lake Michigan Shore)" "Sweet Cheeks 2012 Vintner's Reserve Wild Child Block Pinot Noir (Willamette Valley)" ...
## $ variety : chr [1:120975] "Portuguese Red" "Pinot Gris" "Riesling" "Pinot Noir" ...
## $ winery : chr [1:120975] "Quinta dos Avidagos" "Rainstorm" "St. Julian" "Sweet Cheeks" ...
wine %>%
filter(price != "NA")%>%
summarize(mean(price), median(price), min(price), max(price))
## # A tibble: 1 × 4
## `mean(price)` `median(price)` `min(price)` `max(price)`
## <dbl> <dbl> <dbl> <dbl>
## 1 35.4 25 4 3300
ggplot(wine, aes(price)) +
geom_boxplot()+
scale_x_log10()
## Warning: Removed 8996 rows containing non-finite values (stat_boxplot).
ggplot(wine, aes(price))+
geom_histogram(bins = 100)+
scale_x_log10()
## Warning: Removed 8996 rows containing non-finite values (stat_bin).
wine %>%
filter(price < 75)%>%
ggplot(aes(price))+
geom_histogram(bins = 80)
unique(wine$country)
## [1] "Italy" "Portugal" "US"
## [4] "Spain" "France" "Germany"
## [7] "Argentina" "Chile" "Australia"
## [10] "Austria" "South Africa" "New Zealand"
## [13] "Israel" "Hungary" "Greece"
## [16] "Romania" "Mexico" "Canada"
## [19] NA "Turkey" "Czech Republic"
## [22] "Slovenia" "Luxembourg" "Croatia"
## [25] "Georgia" "Uruguay" "England"
## [28] "Lebanon" "Serbia" "Brazil"
## [31] "Moldova" "Morocco" "Peru"
## [34] "India" "Bulgaria" "Cyprus"
## [37] "Armenia" "Switzerland" "Bosnia and Herzegovina"
## [40] "Ukraine" "Slovakia" "Macedonia"
## [43] "China" "Egypt"
wine %>%
count(country, sort = TRUE)
## # A tibble: 44 × 2
## country n
## <chr> <int>
## 1 US 54504
## 2 France 22093
## 3 Italy 19540
## 4 Spain 6645
## 5 Portugal 5691
## 6 Chile 4472
## 7 Argentina 3800
## 8 Austria 3345
## 9 Australia 2329
## 10 Germany 2165
## # … with 34 more rows
wine_top5 <- wine %>%
count(country) %>%
filter(n >= 5691) %>%
arrange(desc(n))
as.tibble(wine_top5)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## # A tibble: 5 × 2
## country n
## <chr> <int>
## 1 US 54504
## 2 France 22093
## 3 Italy 19540
## 4 Spain 6645
## 5 Portugal 5691
count(wine %>% filter(province == "Oregon"))
## # A tibble: 1 × 1
## n
## <int>
## 1 5373
wine %>%
filter(province == "Oregon")%>%
group_by(variety)%>%
count(variety, sort = TRUE)
## # A tibble: 83 × 2
## # Groups: variety [83]
## variety n
## <chr> <int>
## 1 Pinot Noir 2786
## 2 Chardonnay 498
## 3 Pinot Gris 474
## 4 Riesling 247
## 5 Syrah 172
## 6 Pinot Blanc 111
## 7 Red Blend 109
## 8 Tempranillo 99
## 9 Viognier 90
## 10 Rosé 89
## # … with 73 more rows
wine %>%
filter(province == "Oregon", variety == c("Pinot Noir", "Chardonnay"), price != "NA")%>%
group_by(variety) %>%
summarize(mean(price), median(price))
## Warning in variety == c("Pinot Noir", "Chardonnay"): longer object length is not
## a multiple of shorter object length
## # A tibble: 2 × 3
## variety `mean(price)` `median(price)`
## <chr> <dbl> <dbl>
## 1 Chardonnay 35.4 30
## 2 Pinot Noir 43.9 42
wine %>%
filter(province == "Oregon", variety == c("Pinot Noir", "Chardonnay"), price != "NA")%>%
group_by(variety) %>%
summarize(sd(price))
## Warning in variety == c("Pinot Noir", "Chardonnay"): longer object length is not
## a multiple of shorter object length
## # A tibble: 2 × 2
## variety `sd(price)`
## <chr> <dbl>
## 1 Chardonnay 19.5
## 2 Pinot Noir 20.4
oregon_pn <- wine %>%
filter(province == "Oregon", variety == "Pinot Noir", price != "NA")
oregon_ch <- wine %>%
filter(province == "Oregon", variety == "Chardonnay", price != "NA")
oregon_pn_ch <- rbind(oregon_pn, oregon_ch)
ggplot(oregon_pn_ch, aes(price, fill = "variety"))+
geom_histogram(alpha = 0.5, position = "identity")+
scale_x_log10()+
facet_grid(~variety)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(oregon_pn_ch, aes(price))+
geom_boxplot()+
scale_x_log10()+
coord_flip()+
facet_grid(~variety)
wine_original %>%
count("NA")
## # A tibble: 1 × 2
## `"NA"` n
## <chr> <int>
## 1 NA 129971
wine_original %>%
filter(points == "NA") %>%
count()
## # A tibble: 1 × 1
## n
## <int>
## 1 0
head(wine_original)
## # A tibble: 6 × 14
## id country description designation points price province region_1 region_2
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 0 Italy Aromas inc… Vulkà Bian… 87 NA Sicily … Etna <NA>
## 2 1 Portugal This is ri… Avidagos 87 15 Douro <NA> <NA>
## 3 2 US Tart and s… <NA> 87 14 Oregon Willame… Willame…
## 4 3 US Pineapple … Reserve La… 87 13 Michigan Lake Mi… <NA>
## 5 4 US Much like … Vintner's … 87 65 Oregon Willame… Willame…
## 6 5 Spain Blackberry… Ars In Vit… 87 15 Norther… Navarra <NA>
## # … with 5 more variables: taster_name <chr>, taster_twitter_handle <chr>,
## # title <chr>, variety <chr>, winery <chr>
w_years <- c("1700":"2020")
w_years_match <- str_c(w_years, collapse = "|")
year_vector <- str_extract(wine_original$title, w_years_match)
wine_year <- wine_original %>%
mutate(year = as.numeric(year_vector))%>%
relocate("year","country","variety","price")
str(wine_year)
## tibble [129,971 × 15] (S3: tbl_df/tbl/data.frame)
## $ year : num [1:129971] 2013 2011 2013 2013 2012 ...
## $ country : chr [1:129971] "Italy" "Portugal" "US" "US" ...
## $ variety : chr [1:129971] "White Blend" "Portuguese Red" "Pinot Gris" "Riesling" ...
## $ price : num [1:129971] NA 15 14 13 65 15 16 24 12 27 ...
## $ id : num [1:129971] 0 1 2 3 4 5 6 7 8 9 ...
## $ description : chr [1:129971] "Aromas include tropical fruit, broom, brimstone and dried herb. The palate isn't overly expressive, offering un"| __truncated__ "This is ripe and fruity, a wine that is smooth while still structured. Firm tannins are filled out with juicy r"| __truncated__ "Tart and snappy, the flavors of lime flesh and rind dominate. Some green pineapple pokes through, with crisp ac"| __truncated__ "Pineapple rind, lemon pith and orange blossom start off the aromas. The palate is a bit more opulent, with note"| __truncated__ ...
## $ designation : chr [1:129971] "Vulkà Bianco" "Avidagos" NA "Reserve Late Harvest" ...
## $ points : num [1:129971] 87 87 87 87 87 87 87 87 87 87 ...
## $ province : chr [1:129971] "Sicily & Sardinia" "Douro" "Oregon" "Michigan" ...
## $ region_1 : chr [1:129971] "Etna" NA "Willamette Valley" "Lake Michigan Shore" ...
## $ region_2 : chr [1:129971] NA NA "Willamette Valley" NA ...
## $ taster_name : chr [1:129971] "Kerin O’Keefe" "Roger Voss" "Paul Gregutt" "Alexander Peartree" ...
## $ taster_twitter_handle: chr [1:129971] "@kerinokeefe" "@vossroger" "@paulgwine " NA ...
## $ title : chr [1:129971] "Nicosia 2013 Vulkà Bianco (Etna)" "Quinta dos Avidagos 2011 Avidagos Red (Douro)" "Rainstorm 2013 Pinot Gris (Willamette Valley)" "St. Julian 2013 Reserve Late Harvest Riesling (Lake Michigan Shore)" ...
## $ winery : chr [1:129971] "Nicosia" "Quinta dos Avidagos" "Rainstorm" "St. Julian" ...
wine_2000_2015 <- wine_year %>%
filter(year %in% c("2000":"2015"))
wine_2000_2015 %>%
group_by(country) %>%
summarize(max(year), min(year))
## # A tibble: 44 × 3
## country `max(year)` `min(year)`
## <chr> <dbl> <dbl>
## 1 Argentina 2015 2000
## 2 Armenia 2015 2015
## 3 Australia 2015 2000
## 4 Austria 2015 2000
## 5 Bosnia and Herzegovina 2011 2007
## 6 Brazil 2015 2002
## 7 Bulgaria 2015 2006
## 8 Canada 2015 2002
## 9 Chile 2015 2000
## 10 China 2009 2009
## # … with 34 more rows
wine_year %>%
filter(country %in% c("Spain", "Italy"),
price != "NA")%>%
group_by(year, country)%>%
summarize(sd(price))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
## # A tibble: 71 × 3
## # Groups: year [46]
## year country `sd(price)`
## <dbl> <chr> <dbl>
## 1 1821 Italy 1
## 2 1827 Spain 4.24
## 3 1845 Italy 12.7
## 4 1847 Spain NA
## 5 1860 Italy 4.14
## 6 1868 Italy 13.4
## 7 1870 Italy 0
## 8 1872 Spain NA
## 9 1875 Italy 5.02
## 10 1877 Italy 10.8
## # … with 61 more rows
wine_year_sp_it <- wine_year %>%
filter(country %in% c("Spain", "Italy"), price != "NA")%>%
group_by(year, country)%>%
mutate(sd_price = sd(price))%>% relocate("sd_price")
head(wine_year_sp_it)
## # A tibble: 6 × 16
## # Groups: year, country [5]
## sd_price year country variety price id description designation points
## <dbl> <dbl> <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl>
## 1 32.8 2011 Spain Tempran… 15 5 Blackberry a… Ars In Vitro 87
## 2 37.9 2013 Italy Frappato 16 6 Here's a bri… Belsito 87
## 3 38.6 2010 Spain Tempran… 28 18 Desiccated b… Vendimia Sel… 87
## 4 45.1 2007 Italy White B… 19 22 Delicate aro… Ficiligno 87
## 5 34.0 2009 Italy Nero d'… 35 24 Aromas of pr… Aynat 87
## 6 37.9 2013 Italy White B… 13 26 Pretty aroma… Dalila 87
## # … with 7 more variables: province <chr>, region_1 <chr>, region_2 <chr>,
## # taster_name <chr>, taster_twitter_handle <chr>, title <chr>, winery <chr>
ggplot(wine_year_sp_it, aes(year, sd_price, fill=country))+
geom_col(position = "dodge", alpha = 0.3)+
theme(panel.grid = element_blank(),
panel.background = element_blank())+
scale_y_continuous("Standard Deviation on Price")+
scale_x_continuous("Year",
limits = c(1990,2020),
breaks = c(1990, 2000, 2010))
## Warning: Removed 1398 rows containing missing values (geom_col).
ggplot(wine_year_sp_it, aes(year, price, fill = country))+
geom_col(position = "dodge", alpha = 0.7)+
theme(panel.grid = element_blank(),
panel.background = element_blank())+
scale_y_continuous("Price")+
scale_x_continuous("Year",
limits = c(1990,2020),
breaks = c(1990, 2000, 2010))
## Warning: Removed 1396 rows containing missing values (geom_col).
wine_year_sp_it %>%
filter(sd_price != "NA")%>%
group_by(country)%>%
summarize(mean(sd_price), median(sd_price))
## # A tibble: 2 × 3
## country `mean(sd_price)` `median(sd_price)`
## <chr> <dbl> <dbl>
## 1 Italy 34.8 37.9
## 2 Spain 32.2 32.8
wine_year_or <- wine_year %>%
filter(province == "Oregon",
variety %in% c("Pinot Noir", "Chardonnay"),
year %in% c("2000":"2005"))%>%
group_by(year, variety)%>%
summarize(mean(price))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
wine_year_or
## # A tibble: 9 × 3
## # Groups: year [6]
## year variety `mean(price)`
## <dbl> <chr> <dbl>
## 1 2000 Chardonnay 20.5
## 2 2000 Pinot Noir 42
## 3 2001 Chardonnay 14
## 4 2001 Pinot Noir 24
## 5 2002 Pinot Noir 44.5
## 6 2003 Pinot Noir 29
## 7 2004 Pinot Noir 62.8
## 8 2005 Chardonnay 23.2
## 9 2005 Pinot Noir 40.9
wine_year_mean_or <- wine_year %>%
filter(province == "Oregon",
variety %in% c("Pinot Noir", "Chardonnay"),
year %in% c("2000":"2005"))%>%
group_by(year, variety)%>%
mutate(mean_price = mean(price))%>%
relocate("year", "variety", "mean_price")%>%
arrange(year)
wine_year_mean_or
## # A tibble: 113 × 16
## # Groups: year, variety [9]
## year variety mean_price country price id description designation points
## <dbl> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2000 Pinot N… 42 US 85 6278 This except… Marguerite 94
## 2 2000 Chardon… 20.5 US 12 24342 Partial bar… <NA> 86
## 3 2000 Pinot N… 42 US 24 24347 Very forwar… Estate 86
## 4 2000 Chardon… 20.5 US 11 24354 All barrel … <NA> 86
## 5 2000 Chardon… 20.5 US 28 24648 Lean and cr… <NA> 88
## 6 2000 Chardon… 20.5 US 18 24666 Their estat… Estate 88
## 7 2000 Chardon… 20.5 US 10 24670 About 40% i… Blue Moon 88
## 8 2000 Chardon… 20.5 US 35 24671 Dijon clone… Clos du So… 88
## 9 2000 Chardon… 20.5 US 20 29581 This is a w… Winery Est… 81
## 10 2000 Chardon… 20.5 US 30 38536 In ‘99 Adel… Stoller Vi… 92
## # … with 103 more rows, and 7 more variables: province <chr>, region_1 <chr>,
## # region_2 <chr>, taster_name <chr>, taster_twitter_handle <chr>,
## # title <chr>, winery <chr>
ggplot(wine_year_mean_or, aes(year, mean_price, color=variety))+
geom_line()+
theme_classic()+
scale_y_continuous("Mean Price")+
scale_x_continuous("Year")
wine_ch_pn_under80 <- wine_original %>%
filter(variety %in% c("Chardonnay", "Pinot Noir"),
price < 80)%>%
relocate("price", "points", "variety")%>%
group_by(variety)
wine_ch_pn_under80
## # A tibble: 22,428 × 14
## # Groups: variety [2]
## price points variety id country description designation province region_1
## <dbl> <dbl> <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 65 87 Pinot N… 4 US Much like … Vintner's … Oregon Willame…
## 2 12 87 Chardon… 14 US Building o… <NA> Califor… Central…
## 3 20 87 Pinot N… 21 US A sleek mi… <NA> Oregon Oregon
## 4 69 87 Pinot N… 25 US Oak and ea… King Ridge… Califor… Sonoma …
## 5 50 86 Pinot N… 35 US As with ma… Hyland Oregon McMinnv…
## 6 22 86 Pinot N… 41 US A stiff, t… <NA> Oregon Willame…
## 7 30 85 Chardon… 55 US This shows… Estate Bot… Califor… Napa Va…
## 8 14 85 Chardon… 56 US This is we… <NA> Califor… North C…
## 9 13 85 Pinot N… 58 Chile Lightly he… Reserve Maipo V… <NA>
## 10 24 86 Chardon… 65 France From the w… <NA> Burgundy Chablis
## # … with 22,418 more rows, and 5 more variables: region_2 <chr>,
## # taster_name <chr>, taster_twitter_handle <chr>, title <chr>, winery <chr>
ggplot(wine_ch_pn_under80, aes(points, price, color = variety))+
geom_jitter(alpha = 0.3)+
geom_smooth()+
theme_classic()+
labs(title = "How Points Effect Price",
subtitle = "Chardonnay and Pinot Noir under $80")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
oregon_wine_2010_15 <- wine_year %>%
filter(province == "Oregon",
year %in% c("2010":"2015"),
region_1 != "NA",
region_2 != "NA")%>%
relocate("year", "region_1", "region_2", "points")%>%
group_by(year, region_1, region_2)%>%
summarize(mean(points))
## `summarise()` has grouped output by 'year', 'region_1'. You can override using the `.groups` argument.
oregon_wine_2010_15
## # A tibble: 88 × 4
## # Groups: year, region_1 [88]
## year region_1 region_2 `mean(points)`
## <dbl> <chr> <chr> <dbl>
## 1 2010 Applegate Valley Southern Oregon 87.7
## 2 2010 Chehalem Mountains Willamette Valley 89.5
## 3 2010 Columbia Gorge (OR) Oregon Other 87.7
## 4 2010 Columbia Valley (OR) Oregon Other 87.6
## 5 2010 Dundee Hills Willamette Valley 89.7
## 6 2010 Eola-Amity Hills Willamette Valley 90
## 7 2010 McMinnville Willamette Valley 87.5
## 8 2010 Oregon Oregon Other 87
## 9 2010 Ribbon Ridge Willamette Valley 91.1
## 10 2010 Rogue Valley Southern Oregon 85.9
## # … with 78 more rows
oregon_wine_1015_meanpts <- wine_year %>%
filter(province == "Oregon",
year %in% c("2010":"2015"),
region_1 != "NA",
region_2 != "NA")%>%
group_by(year, region_1, region_2)%>%
mutate(mean_pts = mean(points))%>%
unite('Merged', region_1:region_2, remove = FALSE)%>%
relocate("year", "Merged", "mean_pts", "points")
oregon_wine_1015_meanpts
## # A tibble: 3,682 × 17
## # Groups: year, region_1, region_2 [88]
## year Merged mean_pts points country variety price id description
## <dbl> <chr> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <chr>
## 1 2013 Willamet… 89.5 87 US Pinot … 14 2 Tart and snappy,…
## 2 2012 Willamet… 89.3 87 US Pinot … 65 4 Much like the re…
## 3 2013 Oregon_O… 88.2 87 US Pinot … 20 21 A sleek mix of t…
## 4 2010 McMinnvi… 87.5 86 US Pinot … 50 35 As with many of …
## 5 2015 Eola-Ami… 90.6 86 US Pinot … 25 78 Some rosés are m…
## 6 2014 Willamet… 89.5 91 US Pinot … 38 173 This wine is bur…
## 7 2011 Dundee H… 90.3 92 US Pinot … 52 283 Bella Vida repla…
## 8 2014 Dundee H… 91.3 95 US Pinot … 48 364 For 2014, the We…
## 9 2011 Ribbon R… 90.2 92 US Pinot … 50 460 Proving that exc…
## 10 2012 Dundee H… 91.4 92 US Pinot … 29 461 A marvelous Pino…
## # … with 3,672 more rows, and 8 more variables: designation <chr>,
## # province <chr>, region_1 <chr>, region_2 <chr>, taster_name <chr>,
## # taster_twitter_handle <chr>, title <chr>, winery <chr>
ggplot(oregon_wine_1015_meanpts, aes(year, mean_pts, color=Merged))+
geom_point()+
theme_classic()+
ylab("Average Points")+
xlab("Year")+
labs(title = "How Average Points Have Changed Over the Years",
subtitle = "Oregon Wine Regions")+
theme(legend.title = element_blank())