# Jeff Nieman R Final Project
# Begin by choosing an interesting data set (and what is more interesting than chocolate cake?)
cakefile <- "https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/lme4/cake.csv"
cake <- read.table (file = cakefile, header = TRUE, sep=',')
head (cake,4)
## X replicate recipe temperature angle temp
## 1 1 1 A 175 42 175
## 2 2 1 A 185 46 185
## 3 3 1 A 195 47 195
## 4 4 1 A 205 39 205
# Perform some basic transformations. Transformation #1: Begin by reducing the number of columns, then test.
caketest <- cake[c(1:5)]
head (caketest,4)
## X replicate recipe temperature angle
## 1 1 1 A 175 42
## 2 2 1 A 185 46
## 3 3 1 A 195 47
## 4 4 1 A 205 39
# Transformation #2: Rename the column names, then test.
colnames(caketest) <- c("Test_Number", "Repeat", "Recipe_Creator", "Oven_Temperature", "Break_Angle")
head (caketest,4)
## Test_Number Repeat Recipe_Creator Oven_Temperature Break_Angle
## 1 1 1 A 175 42
## 2 2 1 A 185 46
## 3 3 1 A 195 47
## 4 4 1 A 205 39
# Transformation #3: Add column that calculates temperature in Celsius, then test
caketest <- transform(caketest, Celsius_Temperature = round((Oven_Temperature - 32) * 5/9))
head (caketest,4)
## Test_Number Repeat Recipe_Creator Oven_Temperature Break_Angle
## 1 1 1 A 175 42
## 2 2 1 A 185 46
## 3 3 1 A 195 47
## 4 4 1 A 205 39
## Celsius_Temperature
## 1 79
## 2 85
## 3 91
## 4 96
# Transformation #4: Add country column, then test
cakecountry <- read.table ("file:///C:/Users/jenieman/Documents/GitHub/R-final/cake countries.csv", header = TRUE, sep=',')
cakenew <- cbind(caketest, cakecountry)
head (cakenew,4)
## Test_Number Repeat Recipe_Creator Oven_Temperature Break_Angle
## 1 1 1 A 175 42
## 2 2 1 A 185 46
## 3 3 1 A 195 47
## 4 4 1 A 205 39
## Celsius_Temperature Country
## 1 79 USA
## 2 85 CHE
## 3 91 DEU
## 4 96 FRA
# Transformation #5: Filter to only show rows where the temperature in Celsius is greater than 100, then test
cakefilter <- cakenew[cakenew$Celsius_Temperature > 100,]
head (cakefilter,4)
## Test_Number Repeat Recipe_Creator Oven_Temperature Break_Angle
## 5 5 1 A 215 53
## 6 6 1 A 225 42
## 11 11 1 B 215 55
## 12 12 1 B 225 42
## Celsius_Temperature Country
## 5 102 GBR
## 6 107 MEX
## 11 102 USA
## 12 107 CHE
# Transformation #6: change abbreviations to names for recipe creator, then test
cakenew$Recipe_Creator <- gsub("A", "Anna", cakenew$Recipe_Creator)
cakenew$Recipe_Creator <- gsub("B", "Betsy", cakenew$Recipe_Creator)
cakenew$Recipe_Creator <- gsub("C", "Carrie",cakenew$Recipe_Creator)
head (cakenew,10)
## Test_Number Repeat Recipe_Creator Oven_Temperature Break_Angle
## 1 1 1 Anna 175 42
## 2 2 1 Anna 185 46
## 3 3 1 Anna 195 47
## 4 4 1 Anna 205 39
## 5 5 1 Anna 215 53
## 6 6 1 Anna 225 42
## 7 7 1 Betsy 175 39
## 8 8 1 Betsy 185 46
## 9 9 1 Betsy 195 51
## 10 10 1 Betsy 205 49
## Celsius_Temperature Country
## 1 79 USA
## 2 85 CHE
## 3 91 DEU
## 4 96 FRA
## 5 102 GBR
## 6 107 MEX
## 7 79 JPN
## 8 85 AUS
## 9 91 IND
## 10 96 RUS
# Key Question to investigate: What causes the larger break angles?
# Create basic summary statistics.
# summary #1: What is summary of temperature?
ot <- summary(cakenew$Oven_Temperature)
print(ot)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 175 185 200 200 215 225
# Summary #2: What is summary of angles?
ba <- summary(cakenew$Break_Angle)
print(ba)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 26.00 31.00 32.12 36.75 63.00
# Run some graphical analysis
# Test #1: Look at break angle histogram
hist(cakenew$Break_Angle)

# Observation #1: Mean break angle is 32.12 degrees and distribution is fairly normal but does favor the high end a little (max is 63 degrees)
# Test #2: Look at distribution of angles via a scatter plot looking at relationship between temperature and break angle
plot(Break_Angle~Oven_Temperature, data=cakenew)
cor(cakenew$Break_Angle, cakenew$Oven_Temperature)
## [1] 0.3292422
# Observation #2: the 7 highest angles are all from the 2 highest temperatures, although the highest temperature does not guarantee the highest break angle. The correlation value of a smaller positive (0.329) shows a slight but not overly large correlation. Need to investigate other variables as well.
#Test #3 - Create a layered scatter plot testing recipe creator
require(ggplot2)
## Loading required package: ggplot2

g<-ggplot(cakenew, aes(x=Break_Angle, y=Oven_Temperature))
g+geom_point(aes(color=Recipe_Creator))

# Observation #3: At first glance it appears that Carrie's recipe may contribute to a greater break angle, as Carrie has two of the highest angles.
# Test #4: Plot break angle against recipe creators.
plot(Break_Angle~Recipe_Creator, data=caketest)

# Observation #4: Carrie has two of the highest angles, but overall her mean is the same as Betsy. Anna's recipe has a higher actual mean.
# Test #5: Create a layered scatter plot testing repeat test
g+geom_point(aes(color=Repeat))

# Observation #5: The later tests have the smaller break angles.
# Test #6: Look at country influence.
require(rworldmap)
## Loading required package: rworldmap
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
cakeworld <- joinCountryData2Map( cakenew, joinCode = "ISO3", nameJoinColumn = "Country")
## 270 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 234 codes from the map weren't represented in your data
mapCountryData(cakeworld, nameColumnToPlot="Break_Angle")

# Observation #6: Russia and India have the highest average break angles.
# Final conclusions: To get the highest break angle, it would seem to use an earlier test done in Russia or India at the highest temperature.