Sheff_temp <- read.csv("Sheffield_max_temp_data.csv", header = FALSE)
colnames (Sheff_temp) <- c ("Year", "Month", "Max_temp")
boxplot (Sheff_temp$Max_temp ~ Sheff_temp$Month,
xlab = "", ylab = "Celsius", xaxt = "n",
main = "Average maximum monthly temperature in Sheffield, 1883 - 2014")
axis(side = 1, at = 1:12, labels = month.abb,
las = 3, font =3)
Sheff_temp_noNA <- na.omit(Sheff_temp)
boxplot (Sheff_temp_noNA$ Max_temp ~ Sheff_temp_noNA$ Month,
xlab = "", ylab = "Celsius", xaxt = "n",
main = "Average maximum monthly temperature in Sheffield, 1883 - 2014")
axis(side = 1, at = 1:12, labels = month.abb,
las = 3, font =3)
max_temperature <- which.max (Sheff_temp_noNA $Max_temp)
max_temperature_row <- Sheff_temp_noNA [max_temperature,, ]
max_temperature_row
## Year Month Max_temp
## 1483 2006 7 25.6
min_temperature <- which.min (Sheff_temp_noNA $Max_temp)
min_temperature_row <- Sheff_temp_noNA [min_temperature,, ]
min_temperature_row
## Year Month Max_temp
## 770 1947 2 -0.6
points (max_temperature_row$Month, max_temperature_row$Max_temp ,
pch = 20, col = "red")
points (min_temperature_row$Month, min_temperature_row$Max_temp ,
pch = 20, col = "dodgerblue")
text(3.5, min_temperature_row$Max_temp,
"The lowest temperature",
font = 3, col = "dodgerblue", cex = 0.6)
text(5.5 , max_temperature_row$Max_temp,
"The highest temperature",
font = 3, col = "red", cex = 0.6)
par (mfrow = c (1,1))
boxplot (Sheff_temp$Max_temp ~ Sheff_temp$Month,
xlab = "Months", ylab = "Celsius", xaxt = "n",
main = "Average maximum monthly temperature \n in Sheffield, 1883 - 2014",
col = c (rep ( c("dodgerblue", "indianred1", "green3","goldenrod1") , c (2, 3, 3, 3) )),
cex.main = 0.9
)
axis(side = 1, at = 1:12, labels = month.abb,
las = 3, font =3)
abline (v = c (1:12), col = "grey", lty = 2)
abline (h = c (seq(5 , 25, 5)), col = "grey")
Sheff_data <- data.frame (Sheff_temp_noNA)
plot (Sheff_temp_noNA[Sheff_temp_noNA$Month == 1,]$Year,
Sheff_temp_noNA[Sheff_temp_noNA$Month == 1,]$Max_temp,
type = "l",
col = "blue",
xlab = "year",
ylab = "Average Temperature (°C)",
main = "Average maximum monthly temperature \n in Sheffield, 1883 - 2014",
cex.main = 0.9)
par (mfrow = c (1,1))
qanda <- read.csv ("selective_affinities_aftermodified.csv")
attach (qanda)
data_frame_9.4 <- data.frame(Handspan, Siblings, Fish, Height)
pairs (data_frame_9.4)
detach (qanda)
par(mfrow = c (1,1))
mod.squ <- function (a,d) { # the function takes two numbers, a and d,
f <- abs (a^2 - d^2) # creates the value, f , we cwant to calculate
f # then outputs f
}
mod.squ(a = 3, d = 6)
## [1] 27
aaaa <- 2
dddd <- 4
mod.squ (a = aaaa, d = dddd)
## [1] 12
vector_1 <- c (1, 5, 6, 10)
vector_1
## [1] 1 5 6 10
vector_2 <- c (2, 0, 6, 3)
vector_2
## [1] 2 0 6 3
mod.squ(a = vector_1, d = vector_2)
## [1] 3 25 0 91
matrix_1 <- matrix (data = c (1, 4, 6, 2, 8, 15, 2, 5, 6), nrow = 3)
matrix_1
## [,1] [,2] [,3]
## [1,] 1 2 2
## [2,] 4 8 5
## [3,] 6 15 6
matrix_2 <- matrix (data = c (1, 8, 5, 9, 24, 7, 3, 4, 12), nrow = 3)
matrix_2
## [,1] [,2] [,3]
## [1,] 1 9 3
## [2,] 8 24 4
## [3,] 5 7 12
mod.squ(a = matrix_1, d = matrix_2)
## [,1] [,2] [,3]
## [1,] 0 77 5
## [2,] 48 512 9
## [3,] 11 176 108
The position of each output, if putting an input with a vector or a matrix, will be the corresponded to the position of two input vector / matrix with same size.
Sheff.mth.plot <- function (mth) { # use a number for the month, "mth"
mth_data <- Sheff_temp_noNA[Sheff_temp_noNA[,2] == mth, ]
plot(x = mth_data$Year, y = mth_data$Max_temp, type = "l",
main = month.name[mth], xlab = "year", ylab = "temperature")
}
par(mfrow = c (3, 1))
Sheff.mth.plot( 2 )
Sheff.mth.plot( 5 )
Sheff.mth.plot( 8 )
library(plotrix)
par(mfrow = c (1,1))
Trantor_Uni <- c (87, 88, 87, 88, 88, 89, 87)
plot (Trantor_Uni, ##### This is the name of the data we want to plot
ylab = "percentage", ##### This is the name/label of the y-axis
ylim = c (80, 92),
yaxt = "n",
xlab = "Satisfaction %", ##### Similarly, the name/label of the x-axis
col = "red", ##### The color of the plot
pch = ".", ##### This is the type of symbol of dots, see R reference Card P3 top right corner
type = "o", ##### This specifies the type of plot, see R reference Card P3 plotting section down left corner
lty = 1, lwd = 3, ##### These two controls the type of lines and width of lines, see P3 top right corner
main = "Trantor University student satisfaction")
axis(2, at = 80:92)
axis.break (2, breakpos = 80, style = "zigzag")
the purpose of a violin plot: It is used to visualize the distribution of numerical data.
attach(Sheff_temp_noNA)
library(ggplot2)
ggplot (Sheff_temp_noNA,
aes (x = factor (Month, labels = month.abb),
y = Max_temp, fill = factor (Month)
) ##### Maps the month to the x-axis,
##### mean temperature to the y-axis,
##### and uses the fill aesthetic for different months.
)+
geom_violin (trim = FALSE, alpha = 0.8) + ### Creates a violin plot.
### trim: controls whether to trim the width of the violin
### alpha value to control the transparency of the violins according to your preferences
### higher value of alpha -> less transparent
scale_x_discrete(name = "Month") + ### Sets the axis labels.
scale_y_continuous(name = "Mean Temperature") +
ggtitle("Violin plot of Monthly Mean Temperature of Sheffield")+ ### Gives the plot title.
theme_minimal()+ ### Sets the plot theme to a minimal style.
theme(panel.background = element_rect(fill = "pink")) ### Sets the background color of the plot to pink
detach(Sheff_temp_noNA)
Violin plot can:
See Exercise 9.3 on the same document, it has been improved.
# add the price and mean lines for all three years using
# the a and b arguments for abline(), legend, text and axis features.
cocoa <- read.csv ("Cocoa_prices.csv", header = FALSE)
Mths <- substr(cocoa$V1, start = 1, stop = 3)
Yrs <- paste ("20", substr ( cocoa$V1, start = 5, stop = 6))
cocoa_updated <- data.frame (Yrs, Mths, cocoa$V2)
Price <- cocoa_updated$cocoa.V2
cocoa_updated_price <- data.frame (Yrs, Mths, Price)
cocoa_wide <- reshape ( cocoa_updated_price, timevar = "Yrs", idvar = "Mths", direction = "wide")
colnames (cocoa_wide) <- c ( colnames (cocoa_wide)[1], "2012", "2013", "2014")
##### plot the first line of year 2012
plot(cocoa_wide$"2012",
xlab = "months", ylab = "price ($)", ylim = c (2100, 3300),
main = "Cocoa Prices from 2012 to 2014",
col = "black",
xaxt='n', bty = "l")
months <- as.character( cocoa_wide$Mths)
axis (side = 1, at = 1:12, labels = months)
lines(cocoa_wide$"2012", col = "black", lwd= 3)
abline ( h = mean (cocoa_wide$"2012"),
lty = 3, lwd = 2,
col = "black")
# plot the second line of year 2013
points(cocoa_wide$"2013", col = "deepskyblue", pch = 15)
lines(cocoa_wide$"2013", col = "deepskyblue", lty = 1, lwd = 3)
abline ( h = mean (cocoa_wide$"2013"),
lty = 3, lwd = 2,
col = "blue")
# plot the last line of the year 2014
points(cocoa_wide$"2014", col = "red", pch = 20)
lines(cocoa_wide$"2014", col = "red", lty = 1, lwd = 3)
abline ( h = mean (cocoa_wide$"2014"),
lty = 3, lwd = 2,
col = "red")
# add the legend and text
legend ( x = 10, y = 2300, legend = c ("2012", "2013", "2014"),
col = c ( "black", "blue", "red"),
lty = rep ( 1 , times = 3), cex = 0.8)
text (x = 3, y = 2700,
labels = paste ("The dotted lines are annual means",
sep = ""),
font = 3, col = "black")
# add means
text (x = 10, y = mean(cocoa_wide$"2012") -50,
labels = substitute (paste ("mean:",
# Substitute() function could be used to show greek letters,
# but leaves alone any text in quotation marks.
mu[ 2012], # The [] here is completely different from the one we have used so far,
# They tell the substitute() function to display whatever is inside them
# as a subscript (^ for superscripts)
" = $",
a),
list (a = round (mean (cocoa_wide$"2012"),
0),
sep = "")
),
font = 3, col = "black", cex = 0.8)
text (x = 4, y = mean(cocoa_wide$"2013") +50,
labels = substitute (paste ("mean:", mu[ 2013], " = $", a),
list (a = round (mean (cocoa_wide$"2013"),
0),
sep = "")
),
font = 3, col = "blue", cex = 0.8)
text (x = 8, y = mean(cocoa_wide$"2014") +50,
labels = substitute (paste ("mean:", mu[ 2014], " = $", a),
list (a = round (mean (cocoa_wide$"2014"),
0),
sep = "")
),
font = 3, col = "red", cex = 0.8)
hist (qanda$Handspan, prob = TRUE, col = "lightblue",
main = "Handspan - default bins \n bandwidth = 2", xlab = "handspan", ylab = "frequency")
lines (density(qanda$Handspan), col = "red", lwd = 2)
hist (qanda$Handspan, prob = TRUE, breaks = 8/0.5, col = "lightblue",
main = "Handspan - bandwidth = 1", xlab = "handspan", ylab = "frequency")
lines (density(qanda$Handspan, adjust = 0.5), col = "red", lwd = 2)
hist (qanda$Handspan, prob = TRUE, breaks = 8/0.25, col = "lightblue",
main = "Handspan - bandwidth = 0.5", xlab = "handspan", ylab = "frequency")
lines (density(qanda$Handspan, adjust = 0.25), col = "red", lwd = 2)
We can use a Mosaic plot:
# Load the dataset
data("HairEyeColor")
HairEyeColor_df <- as.table(HairEyeColor)
par(mfrow = c (1,1))
### Create a mosaic plot
mosaicplot(Freq ~ Hair + Eye + Sex,
data = HairEyeColor_df,
color = c ("grey100", "grey80", "grey60", "grey40", "grey20", "black"),
main = "HairEyeColor Dataset")