library(tidyverse)
Registered S3 method overwritten by 'dplyr':
  method           from
  print.rowwise_df     
Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang
── Attaching packages ────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.1.1     ✔ purrr   0.3.2
✔ tibble  2.1.1     ✔ dplyr   0.8.1
✔ tidyr   0.8.3     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.4.0
── Conflicts ───────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(janitor)

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
wine_ratings <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-28/winemag-data-130k-v2.csv")
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
  X1 = col_double(),
  country = col_character(),
  description = col_character(),
  designation = col_character(),
  points = col_double(),
  price = col_double(),
  province = col_character(),
  region_1 = col_character(),
  region_2 = col_character(),
  taster_name = col_character(),
  taster_twitter_handle = col_character(),
  title = col_character(),
  variety = col_character(),
  winery = col_character()
)
clean_names(wine_ratings)
NA

Look at distribution of scores based on evaluator, remove wine if taster name, price, points NA

known_taster <- wine_ratings %>% filter(!is.na(taster_name))   

known_price_taster <- known_taster %>% filter(!is.na(price))

known_points_price_taster <- known_price_taster %>% filter(!is.na(price))

create rating/price ratio

rp_ratio_df <- mutate(known_points_price_taster, rp_ratio = points/price)
tally(group_by(rp_ratio_df, taster_name))

Group by taster and rp ratio

ggplot(rp_ratio_df) +
  aes(x = taster_name) + 
  aes(y = rp_ratio) +
  geom_jitter(alpha = .5, height = 0, width = .25) +
  aes(col = taster_name) + 
  geom_boxplot(alpha = .25) +
  aes(fill = taster_name) 

ggplot(rp_ratio_df) +
  aes(x = taster_name) + 
  aes(y = points) +
  geom_jitter() +
  aes(col = taster_name) + 
  geom_boxplot() 

ratio by country

ggplot(rp_ratio_df) +
  aes(x = country) + 
  aes(y = rp_ratio) +
  geom_jitter() +
  geom_boxplot() 

(country_count <- tally(group_by(rp_ratio_df, country)))
NA

Merge to have count in the data and rename the column

country_count <- merge(country_count,rp_ratio_df)
colnames(country_count)
 [1] "country"               "n"                    
 [3] "X1"                    "description"          
 [5] "designation"           "points"               
 [7] "price"                 "province"             
 [9] "region_1"              "region_2"             
[11] "taster_name"           "taster_twitter_handle"
[13] "title"                 "variety"              
[15] "winery"                "rp_ratio"             

rename n ufos <- ufos %>% rename(spotter.comments = comments)

country_count <- country_count %>% rename(total_count = n)

add factor level of small <100, mid 100-999, large >1000


country_count <- mutate(country_count, producer_level = ifelse(total_count %in% 0:99, "small",
  ifelse(total_count%in% 100:999, "medium",                                    ifelse(total_count %in% 1000:9999, "large",
                                                                                   ifelse(total_count %in% 10000:100000, "massive", "other"
                                                   )))))

Fix factor country_count %>% as.factor(country_count$producer_level, levels=c(“small”, “medium”, “large”, “massive”))


country_count$producer_level <- factor(country_count$producer_level, levels=c("small", "medium", "large", "massive"), ordered=TRUE)

class(country_count$producer_level)
[1] "ordered" "factor" 
tally(group_by(country_count, producer_level))

tally by production level

by_producer_level <- tally(group_by(country_count, producer_level))


ggplot(country_count) +
  aes(x = producer_level) + 
  aes(y = points) +
  geom_boxplot()+
  labs(title= "Meh: Surprisingly similar scores", subtitle ="Variability of Wine Score by Country Representation", x = "Countries Sorted by Review Representation Category", y = "Points") 

small countries points

small_countries <- filter(country_count, producer_level == "small")

ggplot(small_countries) +
  aes(x=country) +
  aes(y=rp_ratio) +
  geom_boxplot() 

LS0tCnRpdGxlOiAiMjAxOTA1MjggTm90ZWJvb2siCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGphbml0b3IpCgp3aW5lX3JhdGluZ3MgPC0gcmVhZHI6OnJlYWRfY3N2KCJodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vcmZvcmRhdGFzY2llbmNlL3RpZHl0dWVzZGF5L21hc3Rlci9kYXRhLzIwMTkvMjAxOS0wNS0yOC93aW5lbWFnLWRhdGEtMTMway12Mi5jc3YiKQoKYGBgCgpgYGB7cn0KY2xlYW5fbmFtZXMod2luZV9yYXRpbmdzKQoKYGBgCgpMb29rIGF0IGRpc3RyaWJ1dGlvbiBvZiBzY29yZXMgYmFzZWQgb24gZXZhbHVhdG9yLCAKcmVtb3ZlIHdpbmUgaWYgdGFzdGVyIG5hbWUsIHByaWNlLCBwb2ludHMgTkEKYGBge3J9Cmtub3duX3Rhc3RlciA8LSB3aW5lX3JhdGluZ3MgJT4lIGZpbHRlcighaXMubmEodGFzdGVyX25hbWUpKSAgIAoKa25vd25fcHJpY2VfdGFzdGVyIDwtIGtub3duX3Rhc3RlciAlPiUgZmlsdGVyKCFpcy5uYShwcmljZSkpCgprbm93bl9wb2ludHNfcHJpY2VfdGFzdGVyIDwtIGtub3duX3ByaWNlX3Rhc3RlciAlPiUgZmlsdGVyKCFpcy5uYShwcmljZSkpCmBgYAoKY3JlYXRlIHJhdGluZy9wcmljZSByYXRpbwpgYGB7cn0KcnBfcmF0aW9fZGYgPC0gbXV0YXRlKGtub3duX3BvaW50c19wcmljZV90YXN0ZXIsIHJwX3JhdGlvID0gcG9pbnRzL3ByaWNlKQpgYGAKCmBgYHtyfQp0YWxseShncm91cF9ieShycF9yYXRpb19kZiwgdGFzdGVyX25hbWUpKQpgYGAKCkdyb3VwIGJ5IHRhc3RlciBhbmQgcnAgcmF0aW8KYGBge3J9CmdncGxvdChycF9yYXRpb19kZikgKwogIGFlcyh4ID0gdGFzdGVyX25hbWUpICsgCiAgYWVzKHkgPSBycF9yYXRpbykgKwogIGdlb21faml0dGVyKGFscGhhID0gLjUsIGhlaWdodCA9IDAsIHdpZHRoID0gLjI1KSArCiAgYWVzKGNvbCA9IHRhc3Rlcl9uYW1lKSArIAogIGdlb21fYm94cGxvdChhbHBoYSA9IC4yNSkgKwogIGFlcyhmaWxsID0gdGFzdGVyX25hbWUpIApgYGAKYGBge3J9CmdncGxvdChycF9yYXRpb19kZikgKwogIGFlcyh4ID0gdGFzdGVyX25hbWUpICsgCiAgYWVzKHkgPSBwb2ludHMpICsKICBnZW9tX2ppdHRlcigpICsKICBhZXMoY29sID0gdGFzdGVyX25hbWUpICsgCiAgZ2VvbV9ib3hwbG90KCkgCmBgYAoKcmF0aW8gYnkgY291bnRyeQpgYGB7cn0KZ2dwbG90KHJwX3JhdGlvX2RmKSArCiAgYWVzKHggPSBjb3VudHJ5KSArIAogIGFlcyh5ID0gcnBfcmF0aW8pICsKICBnZW9tX2ppdHRlcigpICsKICBnZW9tX2JveHBsb3QoKSAKYGBgCgoKYGBge3J9Cihjb3VudHJ5X2NvdW50IDwtIHRhbGx5KGdyb3VwX2J5KHJwX3JhdGlvX2RmLCBjb3VudHJ5KSkpCgpgYGAKTWVyZ2UgdG8gaGF2ZSBjb3VudCBpbiB0aGUgZGF0YSBhbmQgcmVuYW1lIHRoZSBjb2x1bW4KCmBgYHtyfQpjb3VudHJ5X2NvdW50IDwtIG1lcmdlKGNvdW50cnlfY291bnQscnBfcmF0aW9fZGYpCgpgYGAKCmBgYHtyfQpjb2xuYW1lcyhjb3VudHJ5X2NvdW50KQpgYGAKCnJlbmFtZSBuCnVmb3MgPC0gdWZvcyAlPiUgcmVuYW1lKHNwb3R0ZXIuY29tbWVudHMgPSBjb21tZW50cykgCmBgYHtyfQpjb3VudHJ5X2NvdW50IDwtIGNvdW50cnlfY291bnQgJT4lIHJlbmFtZSh0b3RhbF9jb3VudCA9IG4pCmBgYAoKYWRkIGZhY3RvciBsZXZlbCBvZiBzbWFsbCA8MTAwLCBtaWQgMTAwLTk5OSwgbGFyZ2UgPjEwMDAKICAgICAgICAgICAgICAgICAgICAgICAgIApgYGB7cn0KCmNvdW50cnlfY291bnQgPC0gbXV0YXRlKGNvdW50cnlfY291bnQsIHByb2R1Y2VyX2xldmVsID0gaWZlbHNlKHRvdGFsX2NvdW50ICVpbiUgMDo5OSwgInNtYWxsIiwKICBpZmVsc2UodG90YWxfY291bnQlaW4lIDEwMDo5OTksICJtZWRpdW0iLCAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGlmZWxzZSh0b3RhbF9jb3VudCAlaW4lIDEwMDA6OTk5OSwgImxhcmdlIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBpZmVsc2UodG90YWxfY291bnQgJWluJSAxMDAwMDoxMDAwMDAsICJtYXNzaXZlIiwgIm90aGVyIgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICApKSkpKQoKYGBgCgpGaXggZmFjdG9yCmNvdW50cnlfY291bnQgJT4lIGFzLmZhY3Rvcihjb3VudHJ5X2NvdW50JHByb2R1Y2VyX2xldmVsLCBsZXZlbHM9Yygic21hbGwiLCAibWVkaXVtIiwgImxhcmdlIiwgIm1hc3NpdmUiKSkKYGBge3J9Cgpjb3VudHJ5X2NvdW50JHByb2R1Y2VyX2xldmVsIDwtIGZhY3Rvcihjb3VudHJ5X2NvdW50JHByb2R1Y2VyX2xldmVsLCBsZXZlbHM9Yygic21hbGwiLCAibWVkaXVtIiwgImxhcmdlIiwgIm1hc3NpdmUiKSwgb3JkZXJlZD1UUlVFKQoKY2xhc3MoY291bnRyeV9jb3VudCRwcm9kdWNlcl9sZXZlbCkKCnRhbGx5KGdyb3VwX2J5KGNvdW50cnlfY291bnQsIHByb2R1Y2VyX2xldmVsKSkKYGBgCgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKdGFsbHkgYnkgcHJvZHVjdGlvbiBsZXZlbApgYGB7cn0KYnlfcHJvZHVjZXJfbGV2ZWwgPC0gdGFsbHkoZ3JvdXBfYnkoY291bnRyeV9jb3VudCwgcHJvZHVjZXJfbGV2ZWwpKQpgYGAKCgoKYGBge3IgcGxvdF9pX3dhbnR9CgoKZ2dwbG90KGNvdW50cnlfY291bnQpICsKICBhZXMoeCA9IHByb2R1Y2VyX2xldmVsKSArIAogIGFlcyh5ID0gcG9pbnRzKSArCiAgZ2VvbV9ib3hwbG90KCkrCiAgbGFicyh0aXRsZT0gIk1laDogU3VycHJpc2luZ2x5IHNpbWlsYXIgc2NvcmVzIiwgc3VidGl0bGUgPSJWYXJpYWJpbGl0eSBvZiBXaW5lIFNjb3JlIGJ5IENvdW50cnkgUmVwcmVzZW50YXRpb24iLCB4ID0gIkNvdW50cmllcyBTb3J0ZWQgYnkgUmV2aWV3IFJlcHJlc2VudGF0aW9uIENhdGVnb3J5IiwgeSA9ICJQb2ludHMiKSAKYGBgCgpzbWFsbCBjb3VudHJpZXMgcG9pbnRzCmBgYHtyfQpzbWFsbF9jb3VudHJpZXMgPC0gZmlsdGVyKGNvdW50cnlfY291bnQsIHByb2R1Y2VyX2xldmVsID09ICJzbWFsbCIpCgpnZ3Bsb3Qoc21hbGxfY291bnRyaWVzKSArCiAgYWVzKHg9Y291bnRyeSkgKwogIGFlcyh5PXJwX3JhdGlvKSArCiAgZ2VvbV9ib3hwbG90KCkgCgpgYGAKCg==