Load R packages tidyverse, xray and skimr.

library(tidyverse) library(xray) library(skimr) library(JavaGD) library(xlsx)

Load coffee ratings dataset

coffee_ratings <- read_csv(“/Users/Ballance/Downloads/coffee_ratings.csv”)

Review dataset

xray::anomalies(coffee_ratings) summary(coffee_ratings)

Create vectors for columns of dataframe for modification

total_cup_points <- coffee_ratings\(total_cup_points mill<- coffee_ratings\)mill owner <- coffee_ratings\(owner altitude_mean_meters <- coffee_ratings\)altitude_mean_meters species <- coffee_ratings&ratings country_of_origin <- coffee_ratings\(country_of_origin grading_date <- coffee_ratings\)grading_date variety <- coffee_ratings\(variety processing_method <- coffee_ratings\)processing_method aroma <- coffee_ratings\(aroma flavour <- coffee_ratings\)flavor aftertaste <- coffee_ratings\(aftertaste acidity <- coffee_ratings\)acidity body_coffee <- coffee_ratings\(body balance <- coffee_ratings\)balance moisture <- coffee_ratings\(moisture colour <- coffee_ratings\)color

Capitalise Owner dataset

owner_ii <- str_to_title(owner)

Create new data frame

Omitting mill as there are several NA values (doing prior to removing all NA values retains an additional 95 rows of data). Omitting harvest_year as there are inconsistencies of recorded data so will exclude from analysis (though there may be correlation between time difference of harvest to grading date). Omitting number_of_bags and bag_weight as will not be including in analysis. Uniformity, clean_cup and sweetness excluded as will not be included in analysis.

coffee_ratings_ii <- data_frame(total_cup_points, owner_ii, altitude_mean_meters, species, country_of_origin, grading_date, variety, processing_method, aroma, flavour, aftertaste, acidity, body_coffee, balance, moisture, colour)

Remove NA values from dataset

coffee_ratings_tableau <- na.omit(coffee_ratings_ii)

Export dataset for Tableau

write_csv(coffee_ratings_tableau, “/Users/Ballance/Documents/MSc/Unit 1/coffee_ratings_tableau.csv”)