Sheff_temp <- read.csv("Sheffield_max_temp_data.csv", header = FALSE)

colnames (Sheff_temp) <- c ("Year", "Month", "Max_temp")

Exercise 9.1.

boxplot (Sheff_temp$Max_temp ~ Sheff_temp$Month,
         xlab = "", ylab = "Celsius", xaxt = "n",
         main = "Average maximum monthly temperature in Sheffield, 1883 - 2014")
axis(side = 1, at = 1:12, labels = month.abb, 
     las = 3, font =3)

Exercise 9.2.

Sheff_temp_noNA <- na.omit(Sheff_temp)
boxplot (Sheff_temp_noNA$ Max_temp ~ Sheff_temp_noNA$ Month,
         xlab = "", ylab = "Celsius", xaxt = "n",
         main = "Average maximum monthly temperature in Sheffield, 1883 - 2014")
axis(side = 1, at = 1:12, labels = month.abb, 
     las = 3, font =3)

max_temperature <- which.max  (Sheff_temp_noNA $Max_temp)
max_temperature_row <- Sheff_temp_noNA   [max_temperature,, ]
max_temperature_row
##      Year Month Max_temp
## 1483 2006     7     25.6
min_temperature <- which.min  (Sheff_temp_noNA $Max_temp)
min_temperature_row <- Sheff_temp_noNA   [min_temperature,, ]
min_temperature_row
##     Year Month Max_temp
## 770 1947     2     -0.6
points (max_temperature_row$Month, max_temperature_row$Max_temp ,
        pch = 20, col = "red")
points (min_temperature_row$Month, min_temperature_row$Max_temp ,
        pch = 20, col = "dodgerblue")

text(3.5, min_temperature_row$Max_temp, 
     "The lowest temperature",
     font = 3, col = "dodgerblue", cex = 0.6)

text(5.5 , max_temperature_row$Max_temp, 
     "The highest temperature",
     font = 3, col = "red", cex = 0.6)

Exercise 9.3.

par (mfrow = c (1,1))
boxplot (Sheff_temp$Max_temp ~ Sheff_temp$Month,
         xlab = "Months", ylab = "Celsius", xaxt = "n",
         main = "Average maximum monthly temperature \n  in Sheffield, 1883 - 2014",
         col = c (rep ( c("dodgerblue", "indianred1", "green3","goldenrod1") , c (2, 3, 3, 3) )),
         cex.main = 0.9
)
axis(side = 1, at = 1:12, labels = month.abb, 
     las = 3, font =3)
abline (v = c (1:12), col = "grey", lty = 2)
abline (h = c (seq(5 , 25, 5)), col = "grey")

Sheff_data <- data.frame (Sheff_temp_noNA)

plot (Sheff_temp_noNA[Sheff_temp_noNA$Month == 1,]$Year,
      Sheff_temp_noNA[Sheff_temp_noNA$Month == 1,]$Max_temp,
      type = "l",
      col = "blue",
      xlab = "year",
      ylab = "Average Temperature (°C)",
      main = "Average maximum monthly temperature \n in Sheffield, 1883 - 2014",
      cex.main = 0.9)

par (mfrow = c (1,1))

Exercise 9.4.

qanda <- read.csv ("selective_affinities_aftermodified.csv")
attach (qanda)
data_frame_9.4 <- data.frame(Handspan, Siblings, Fish, Height)
pairs (data_frame_9.4)

detach (qanda)

par(mfrow = c (1,1))

Exercise 9.5.

mod.squ <- function (a,d) {  # the function takes two numbers, a and d,
  f <- abs (a^2 - d^2)       # creates the value, f , we cwant to calculate
  f                          # then outputs f
}

mod.squ(a = 3, d = 6)
## [1] 27
aaaa <- 2
dddd <- 4
mod.squ (a = aaaa, d = dddd)
## [1] 12
vector_1 <- c (1, 5, 6, 10)
vector_1
## [1]  1  5  6 10
vector_2 <- c (2, 0, 6, 3)
vector_2
## [1] 2 0 6 3
mod.squ(a = vector_1, d = vector_2)
## [1]  3 25  0 91
matrix_1 <- matrix (data = c (1, 4, 6, 2, 8, 15, 2, 5, 6), nrow = 3)
matrix_1
##      [,1] [,2] [,3]
## [1,]    1    2    2
## [2,]    4    8    5
## [3,]    6   15    6
matrix_2 <- matrix (data = c (1, 8, 5, 9, 24, 7, 3, 4, 12), nrow = 3)
matrix_2
##      [,1] [,2] [,3]
## [1,]    1    9    3
## [2,]    8   24    4
## [3,]    5    7   12
mod.squ(a = matrix_1, d = matrix_2)
##      [,1] [,2] [,3]
## [1,]    0   77    5
## [2,]   48  512    9
## [3,]   11  176  108

The position of each output, if putting an input with a vector or a matrix, will be the corresponded to the position of two input vector / matrix with same size.

Exercise 9.6.

Sheff.mth.plot <- function (mth) {  # use a number for the month, "mth"
  mth_data <- Sheff_temp_noNA[Sheff_temp_noNA[,2] == mth, ]
  plot(x = mth_data$Year, y = mth_data$Max_temp, type = "l",
       main = month.name[mth], xlab = "year", ylab = "temperature")
  
}

par(mfrow = c (3, 1))
Sheff.mth.plot( 2 )
Sheff.mth.plot( 5 )
Sheff.mth.plot( 8 )

Exercise 9.7.

library(plotrix)
par(mfrow = c (1,1))
Trantor_Uni <- c (87, 88, 87, 88, 88, 89, 87)
plot (Trantor_Uni,                ##### This is the name of the data we want to plot 
      ylab = "percentage",        ##### This is the name/label of the y-axis 
      ylim =  c (80, 92),
      yaxt = "n",
      xlab = "Satisfaction %",    ##### Similarly, the name/label of the x-axis
      col = "red",                ##### The color of the plot 
      pch = ".",                  ##### This is the type of symbol of dots, see R reference Card P3 top right corner
      type = "o",                 ##### This specifies the type of plot, see R reference Card P3 plotting section down left corner
      lty = 1, lwd = 3,           ##### These two controls the type of lines and width of lines, see P3 top right corner
      main = "Trantor University student satisfaction")

axis(2, at = 80:92)
axis.break (2, breakpos = 80, style = "zigzag")

Exercise 9.8.

the purpose of a violin plot: It is used to visualize the distribution of numerical data.

attach(Sheff_temp_noNA)

library(ggplot2)
ggplot (Sheff_temp_noNA, 
        aes (x = factor (Month, labels = month.abb),
             y = Max_temp, fill = factor (Month) 
        )   ##### Maps the month to the x-axis, 
        ##### mean temperature to the y-axis, 
        ##### and uses the fill aesthetic for different months.
)+
  geom_violin (trim = FALSE, alpha = 0.8) +   ### Creates a violin plot.
  ### trim: controls whether to trim the width of the violin
  ### alpha value to control the transparency of the violins according to your preferences
  ### higher value of alpha -> less transparent
  scale_x_discrete(name = "Month") +                                   ###  Sets the axis labels.
  scale_y_continuous(name = "Mean Temperature") +                      
  ggtitle("Violin plot of Monthly Mean Temperature of Sheffield")+     ###  Gives the plot title.
  theme_minimal()+                                                     ###  Sets the plot theme to a minimal style.
  theme(panel.background = element_rect(fill = "pink"))                ###  Sets the background color of the plot to pink

detach(Sheff_temp_noNA)

Violin plot can:

Exercise 9.9.

See Exercise 9.3 on the same document, it has been improved.

Exercise 9.10.

The plot for exercise 6.4. Cocoa Prices from 2012 to 2014
# add the price and mean lines for all three years using
# the a and b arguments for abline(), legend, text and axis features.

cocoa <- read.csv ("Cocoa_prices.csv", header = FALSE)
Mths <- substr(cocoa$V1, start = 1, stop = 3) 
Yrs <- paste ("20", substr ( cocoa$V1, start = 5, stop = 6)) 
cocoa_updated <- data.frame (Yrs, Mths, cocoa$V2)
Price <- cocoa_updated$cocoa.V2
cocoa_updated_price <- data.frame (Yrs, Mths, Price)
cocoa_wide <- reshape ( cocoa_updated_price, timevar = "Yrs", idvar = "Mths", direction = "wide")
colnames (cocoa_wide) <- c ( colnames (cocoa_wide)[1], "2012", "2013", "2014")


##### plot the first line of year 2012
plot(cocoa_wide$"2012",
     xlab = "months", ylab = "price ($)", ylim = c (2100, 3300), 
     main = "Cocoa Prices from 2012 to 2014",
     col = "black",
     xaxt='n', bty = "l")

months <- as.character( cocoa_wide$Mths)
axis (side = 1, at = 1:12, labels = months)

lines(cocoa_wide$"2012", col = "black", lwd= 3)

abline ( h = mean (cocoa_wide$"2012"), 
         lty = 3, lwd = 2, 
         col = "black")

# plot the second line of year 2013
points(cocoa_wide$"2013", col = "deepskyblue", pch = 15)
lines(cocoa_wide$"2013", col = "deepskyblue", lty = 1, lwd = 3)
abline ( h = mean (cocoa_wide$"2013"), 
         lty = 3, lwd = 2, 
         col = "blue")

# plot the last line of the year 2014
points(cocoa_wide$"2014", col = "red", pch = 20)
lines(cocoa_wide$"2014", col = "red", lty = 1, lwd = 3)
abline ( h = mean (cocoa_wide$"2014"), 
         lty = 3, lwd = 2, 
         col = "red")

# add the legend and text
legend ( x = 10, y = 2300, legend = c ("2012", "2013", "2014"),
         col = c ( "black", "blue", "red"),
         lty = rep ( 1 , times = 3), cex = 0.8)
text (x = 3, y = 2700,
      labels = paste ("The dotted lines are annual means", 
                      sep = ""), 
      font = 3, col = "black")

# add means
text (x = 10, y = mean(cocoa_wide$"2012") -50, 
      labels = substitute (paste ("mean:",  
                                  # Substitute() function could be used to show greek letters, 
                                  # but leaves alone any text in quotation marks.
                                  mu[ 2012],   # The [] here is completely different from the one we have used so far,
                                  # They tell the substitute() function to display whatever is inside them
                                  # as a subscript  (^ for superscripts)
                                  " = $", 
                                  a),
                           list (a = round (mean (cocoa_wide$"2012"), 
                                            0), 
                                 sep = "")
      ), 
      font = 3, col = "black", cex = 0.8)

text (x = 4, y = mean(cocoa_wide$"2013") +50, 
      labels = substitute (paste ("mean:", mu[ 2013], " = $", a),
                           list (a = round (mean (cocoa_wide$"2013"), 
                                            0), 
                                 sep = "")
      ), 
      font = 3, col = "blue", cex = 0.8)

text (x = 8, y = mean(cocoa_wide$"2014") +50, 
      labels = substitute (paste ("mean:", mu[ 2014], " = $", a),
                           list (a = round (mean (cocoa_wide$"2014"), 
                                            0), 
                                 sep = "")
      ), 
      font = 3, col = "red", cex = 0.8)

  • don’t forget to count how many opening and closing brackets there are to avoid silly errors.

Exercise 9.11.

hist (qanda$Handspan, prob = TRUE, col = "lightblue",
      main = "Handspan - default bins \n bandwidth = 2", xlab = "handspan", ylab = "frequency")
lines (density(qanda$Handspan), col = "red", lwd = 2)

hist (qanda$Handspan, prob = TRUE, breaks = 8/0.5, col = "lightblue",
      main = "Handspan - bandwidth = 1", xlab = "handspan", ylab = "frequency")
lines (density(qanda$Handspan, adjust = 0.5), col = "red", lwd = 2)

hist (qanda$Handspan, prob = TRUE, breaks = 8/0.25, col = "lightblue",
      main = "Handspan - bandwidth = 0.5", xlab = "handspan", ylab = "frequency")
lines (density(qanda$Handspan, adjust = 0.25), col = "red", lwd = 2)

Exercise 9.12

We can use a Mosaic plot:

# Load the dataset
data("HairEyeColor")

HairEyeColor_df <- as.table(HairEyeColor)
par(mfrow = c (1,1))
### Create a mosaic plot
mosaicplot(Freq ~ Hair + Eye + Sex, 
           data = HairEyeColor_df, 
           color = c ("grey100", "grey80", "grey60", "grey40", "grey20", "black"),
           main = "HairEyeColor Dataset")