Tutorial 3: Using the R Console

# 1) this loads an internal dataset in R
data("mtcars")
summary(mtcars)

##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000

# 2) this loads the correlogram package from your library
require(corrplot)

## Loading required package: corrplot

## corrplot 0.95 loaded

## 3) calculate the correlation between all columns
corr_matrix <- cor(mtcars)

## 4) show the correlation with circles
corrplot(corr_matrix)

## with numbers instead of circles
corrplot(corr_matrix, method = "number", type = "lower")

Tutorial 4: R as an Overgrown Calculator

The basics of R includes:
1. How to do basic arithmetic
2. How to create, save and manipulate objects
3. How to load data from text and excel files
4. How to find and use functions
5. How to plot data, and change labels, axes and alter colors

# show help for the setwd function (only if the package is loaded)
?setwd

# search for anything containing "setwd" across ALL packages (loaded or not)
# useful when you don't know the exact function name or which package it's in
??setwd

# demonstration function -> code demonstrations on how to do all kinds of things
# e.g. demo for some graphical features to create stunning graphics
demo(graphics)

## 
## 
##  demo(graphics)
##  ---- ~~~~~~~~
## 
## > #  Copyright (C) 1997-2009 The R Core Team
## > 
## > require(datasets)
## 
## > require(grDevices); require(graphics)
## 
## > ## Here is some code which illustrates some of the differences between
## > ## R and S graphics capabilities.  Note that colors are generally specified
## > ## by a character string name (taken from the X11 rgb.txt file) and that line
## > ## textures are given similarly.  The parameter "bg" sets the background
## > ## parameter for the plot and there is also an "fg" parameter which sets
## > ## the foreground color.
## > 
## > 
## > x <- stats::rnorm(50)
## 
## > opar <- par(bg = "white")
## 
## > plot(x, ann = FALSE, type = "n")

## 
## > abline(h = 0, col = gray(.90))
## 
## > lines(x, col = "green4", lty = "dotted")
## 
## > points(x, bg = "limegreen", pch = 21)
## 
## > title(main = "Simple Use of Color In a Plot",
## +       xlab = "Just a Whisper of a Label",
## +       col.main = "blue", col.lab = gray(.8),
## +       cex.main = 1.2, cex.lab = 1.0, font.main = 4, font.lab = 3)
## 
## > ## A little color wheel.    This code just plots equally spaced hues in
## > ## a pie chart.    If you have a cheap SVGA monitor (like me) you will
## > ## probably find that numerically equispaced does not mean visually
## > ## equispaced.  On my display at home, these colors tend to cluster at
## > ## the RGB primaries.  On the other hand on the SGI Indy at work the
## > ## effect is near perfect.
## > 
## > par(bg = "gray")
## 
## > pie(rep(1,24), col = rainbow(24), radius = 0.9)

## 
## > title(main = "A Sample Color Wheel", cex.main = 1.4, font.main = 3)
## 
## > title(xlab = "(Use this as a test of monitor linearity)",
## +       cex.lab = 0.8, font.lab = 3)
## 
## > ## We have already confessed to having these.  This is just showing off X11
## > ## color names (and the example (from the postscript manual) is pretty "cute".
## > 
## > pie.sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12)
## 
## > names(pie.sales) <- c("Blueberry", "Cherry",
## +              "Apple", "Boston Cream", "Other", "Vanilla Cream")
## 
## > pie(pie.sales,
## +     col = c("purple","violetred1","green3","cornsilk","cyan","white"))

## 
## > title(main = "January Pie Sales", cex.main = 1.8, font.main = 1)
## 
## > title(xlab = "(Don't try this at home kids)", cex.lab = 0.8, font.lab = 3)
## 
## > ## Boxplots:  I couldn't resist the capability for filling the "box".
## > ## The use of color seems like a useful addition, it focuses attention
## > ## on the central bulk of the data.
## > 
## > par(bg="cornsilk")
## 
## > n <- 10
## 
## > g <- gl(n, 100, n*100)
## 
## > x <- rnorm(n*100) + sqrt(as.numeric(g))
## 
## > boxplot(split(x,g), col="lavender", notch=TRUE)

## 
## > title(main="Notched Boxplots", xlab="Group", font.main=4, font.lab=1)
## 
## > ## An example showing how to fill between curves.
## > 
## > par(bg="white")
## 
## > n <- 100
## 
## > x <- c(0,cumsum(rnorm(n)))
## 
## > y <- c(0,cumsum(rnorm(n)))
## 
## > xx <- c(0:n, n:0)
## 
## > yy <- c(x, rev(y))
## 
## > plot(xx, yy, type="n", xlab="Time", ylab="Distance")

## 
## > polygon(xx, yy, col="gray")
## 
## > title("Distance Between Brownian Motions")
## 
## > ## Colored plot margins, axis labels and titles.    You do need to be
## > ## careful with these kinds of effects.    It's easy to go completely
## > ## over the top and you can end up with your lunch all over the keyboard.
## > ## On the other hand, my market research clients love it.
## > 
## > x <- c(0.00, 0.40, 0.86, 0.85, 0.69, 0.48, 0.54, 1.09, 1.11, 1.73, 2.05, 2.02)
## 
## > par(bg="lightgray")
## 
## > plot(x, type="n", axes=FALSE, ann=FALSE)

## 
## > usr <- par("usr")
## 
## > rect(usr[1], usr[3], usr[2], usr[4], col="cornsilk", border="black")
## 
## > lines(x, col="blue")
## 
## > points(x, pch=21, bg="lightcyan", cex=1.25)
## 
## > axis(2, col.axis="blue", las=1)
## 
## > axis(1, at=1:12, lab=month.abb, col.axis="blue")
## 
## > box()
## 
## > title(main= "The Level of Interest in R", font.main=4, col.main="red")
## 
## > title(xlab= "1996", col.lab="red")
## 
## > ## A filled histogram, showing how to change the font used for the
## > ## main title without changing the other annotation.
## > 
## > par(bg="cornsilk")
## 
## > x <- rnorm(1000)
## 
## > hist(x, xlim=range(-4, 4, x), col="lavender", main="")

## 
## > title(main="1000 Normal Random Variates", font.main=3)
## 
## > ## A scatterplot matrix
## > ## The good old Iris data (yet again)
## > 
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", font.main=4, pch=19)

## 
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", pch=21,
## +       bg = c("red", "green3", "blue")[unclass(iris$Species)])

## 
## > ## Contour plotting
## > ## This produces a topographic map of one of Auckland's many volcanic "peaks".
## > 
## > x <- 10*1:nrow(volcano)
## 
## > y <- 10*1:ncol(volcano)
## 
## > lev <- pretty(range(volcano), 10)
## 
## > par(bg = "lightcyan")
## 
## > pin <- par("pin")
## 
## > xdelta <- diff(range(x))
## 
## > ydelta <- diff(range(y))
## 
## > xscale <- pin[1]/xdelta
## 
## > yscale <- pin[2]/ydelta
## 
## > scale <- min(xscale, yscale)
## 
## > xadd <- 0.5*(pin[1]/scale - xdelta)
## 
## > yadd <- 0.5*(pin[2]/scale - ydelta)
## 
## > plot(numeric(0), numeric(0),
## +      xlim = range(x)+c(-1,1)*xadd, ylim = range(y)+c(-1,1)*yadd,
## +      type = "n", ann = FALSE)

## 
## > usr <- par("usr")
## 
## > rect(usr[1], usr[3], usr[2], usr[4], col="green3")
## 
## > contour(x, y, volcano, levels = lev, col="yellow", lty="solid", add=TRUE)
## 
## > box()
## 
## > title("A Topographic Map of Maunga Whau", font= 4)
## 
## > title(xlab = "Meters North", ylab = "Meters West", font= 3)
## 
## > mtext("10 Meter Contour Spacing", side=3, line=0.35, outer=FALSE,
## +       at = mean(par("usr")[1:2]), cex=0.7, font=3)
## 
## > ## Conditioning plots
## > 
## > par(bg="cornsilk")
## 
## > coplot(lat ~ long | depth, data = quakes, pch = 21, bg = "green3")

## 
## > par(opar)

# get reproducible code examples for any function
example(mean)

## 
## mean> x <- c(0:10, 50)
## 
## mean> xm <- mean(x)
## 
## mean> c(xm, mean(x, trim = 0.10))
## [1] 8.75 5.50

# look up objects online in several R web resources
# RSiteSearch("spearman correlation")

# R can do simple arithmetic using + for addition, - for subtraction, / for division 
# and * for multiplication
1 + 1

## [1] 2

2 - 3

## [1] -1

4 / 5

## [1] 0.8

3 * 2

## [1] 6

# Exercise 1
answer1 <- 5678 - 1234
print(paste("The answer to Exercise 1 is ", answer1,"."))

## [1] "The answer to Exercise 1 is  4444 ."

# finding out what rev() function does -> it reverses the vector or other object defined
?rev

# QRS poem
QRSpoem <- c(
"I can't grasp these concepts.",
"So don't assume that,",
"I am capable of mastering this subject.",
"Whenever I give it a shot,",
"I become lost and overwhelmed.",
"And I don't see how,",
"There's a spark of curiosity in me.",
"So remember before you judge,",
"That I'm just not cut out for this.",
"And no amount of encouragement will convince me,",
"I have potential to excel.",
"Each setback whispers to me,",
"I'm just not smart enough.",
"It's hard to believe that,",
"There's a mathematician, coder, or statistician inside of me.",
"Because every challenge makes me wonder;",
"Can I truly become proficient?"
)
rev(QRSpoem)

##  [1] "Can I truly become proficient?"                               
##  [2] "Because every challenge makes me wonder;"                     
##  [3] "There's a mathematician, coder, or statistician inside of me."
##  [4] "It's hard to believe that,"                                   
##  [5] "I'm just not smart enough."                                   
##  [6] "Each setback whispers to me,"                                 
##  [7] "I have potential to excel."                                   
##  [8] "And no amount of encouragement will convince me,"             
##  [9] "That I'm just not cut out for this."                          
## [10] "So remember before you judge,"                                
## [11] "There's a spark of curiosity in me."                          
## [12] "And I don't see how,"                                         
## [13] "I become lost and overwhelmed."                               
## [14] "Whenever I give it a shot,"                                   
## [15] "I am capable of mastering this subject."                      
## [16] "So don't assume that,"                                        
## [17] "I can't grasp these concepts."

# calculating the expected mean temperature of Earth in degrees C if there was no atmosphere
# Fs is the total energy incoming from the sun to earth which is currently 1368 Watts per square meter (per second)
F_S <- 1368
# A is the solar reflection of earth (earth’s albedo which is 0.29)
A <- 0.29 
# σ is Stefan-Boltzmann constant (5.67×10−8)
σ <- 5.67*10^(-8)
# the 1/4 power corrects for the spherical and rotating nature of earth and
# the−273.15 converts Kelvin to Celsius

# now that the variables are defined, they can be plucked into the equation
T_E <- ((F_S*(1-A)/(4*σ))^(1/4))-273.15
print(paste("The expected mean temperature of Earth if there was no atmosphere is (and therefore the answer
            to Exercise 2) ", T_E, "degrees Celsius."))

## [1] "The expected mean temperature of Earth if there was no atmosphere is (and therefore the answer\n            to Exercise 2)  -17.3353871515439 degrees Celsius."

# hint: with complicated formulas in R, it is wise to use the object-oriented system which 
# basically just means save parts of the formula as objects and do stepwise calculations
P_1 <- F_S * (1-A)
P_2 <- 4*σ
Step1 <- P_1 / P_2
Step2 <- Step1^(1/4)
Step3 <- Step2 - 273.15
print(paste("Using the object-oriented method, we get the same answer to Exercise 2 of ", Step3, 
            "degrees Celsius."))

## [1] "Using the object-oriented method, we get the same answer to Exercise 2 of  -17.3353871515439 degrees Celsius."

Tutorial 5: Loading, manipulating and exploring datasets

In exercise 2, I calculated the mean temperature of the Earth if there was no atmosphere (which we called TE ). Next, I will be calculating the actual temperature of the Earth as recorded since 1880. The difference between the two is caused by the so-called greenhouse effect. In the next exercise, I will calculate the size of the greenhouse effect on Earth.

# if i need to find the file path first
# print(paste(file_path <- file.choose()))
# making chosen file path into an object in R environment 
# temperature <- read.csv(file_path)

# finding the file path myself in the working directory
temperature <- read.csv("QRS.NASAGISS1880-2021.csv")

# what are the dimensions of a dataset? first step after loading a dataset!
# number of records (rows) and variables (columns)
dim(temperature)

## [1] 1698    3

# summary of min, max, quartiles and mean per column
summary(temperature)

##       temp            year           doy       
##  Min.   :10.78   Min.   :1880   Min.   : 14.0  
##  1st Qu.:12.29   1st Qu.:1915   1st Qu.: 76.0  
##  Median :13.59   Median :1950   Median :167.0  
##  Mean   :13.59   Mean   :1950   Mean   :182.3  
##  3rd Qu.:14.94   3rd Qu.:1986   3rd Qu.:259.0  
##  Max.   :16.36   Max.   :2021   Max.   :350.0

# view first six and last six lines of dataset (six = default)
head(temperature)

##    temp year doy
## 1 11.41 1880  14
## 2 11.61 1880  47
## 3 12.44 1880  76
## 4 13.37 1880 105
## 5 14.38 1880 138
## 6 14.95 1880 167

tail(temperature)

##       temp year doy
## 1693 12.39 2021  14
## 1694 12.48 2021  47
## 1695 13.40 2021  76
## 1696 14.28 2021 105
## 1697 15.26 2021 138
## 1698 16.00 2021 167

# creating a subset of dataset by row indexing
index <- 1:10
index

##  [1]  1  2  3  4  5  6  7  8  9 10

# viewing the first 10 lines with the index just made -> indexed as data[rows,columns]
temperature[index, ]

##     temp year doy
## 1  11.41 1880  14
## 2  11.61 1880  47
## 3  12.44 1880  76
## 4  13.37 1880 105
## 5  14.38 1880 138
## 6  14.95 1880 167
## 7  15.25 1880 197
## 8  15.20 1880 229
## 9  14.52 1880 259
## 10 13.41 1880 288

# you could also get a small chunk indexed by rows AND columns
index1 <- 1:6
index2 <- 1:2
temperature[index1,index2]

##    temp year
## 1 11.41 1880
## 2 11.61 1880
## 3 12.44 1880
## 4 13.37 1880
## 5 14.38 1880
## 6 14.95 1880

# subset can be made using the name of the column as well e.g. for 1881 only
temperature[13:24,"temp"]

##  [1] 11.39 11.71 12.56 13.58 14.54 14.97 15.43 15.27 14.52 13.43 12.37 11.79

# trying something out instead of index exercise
# leaving this out of the summary document -> essentially shows TRUE for all years up to
# but not including 1990, and FALSE for the rest in a table format
# yearindex <- temperature$year
# yearindex < 1990

# using which function to find hottest average year
# first, printing the maximal value to the console (saving it as a separate object)
maxTemp <- max(temperature[,"temp"])
# second, applying which function to find the hottest recorded temperature
which(temperature[,"temp"] == maxTemp)

## [1] 1675

# print the record to the console (row corresponding with the hottest temperature)
temperature[which(temperature[,"temp"] == maxTemp),]

##       temp year doy
## 1675 16.36 2019 197

# or as in the Tutorial:
temperature[1675, ]

##       temp year doy
## 1675 16.36 2019 197

# another way to find maximum record using which.max (specialized function)
temperature[which.max(temperature[,"temp"]), ]

##       temp year doy
## 1675 16.36 2019 197

Exercise 3

When was the minimum global temperature recorded (day and year)?

minTempyear <- temperature[which.min(temperature[,"temp"]), ]
print(paste("The lowest temperature was recorded on day", minTempyear$doy,"of the year", minTempyear$year, "."))

## [1] "The lowest temperature was recorded on day 14 of the year 1893 ."

Loading, manipulating and exploring datasets (continued)

# obtaining subset of the temperatures recorded in the year 2000 by creating an index for the year 2000
year2000 <- temperature[,"year"] == 2000

# subsetting the data to that year
temperature[year2000, ]

##       temp year doy
## 1441 11.83 2000  14
## 1442 12.40 2000  47
## 1443 13.07 2000  76
## 1444 14.08 2000 105
## 1445 14.83 2000 138
## 1446 15.55 2000 167
## 1447 15.81 2000 197
## 1448 15.71 2000 229
## 1449 15.04 2000 259
## 1450 13.92 2000 288
## 1451 12.85 2000 321
## 1452 12.14 2000 350

# saving this into new object of all 2000 recorded temperatures
temp2000 <- temperature[year2000, ]

# looking at the new size and summary of the dataset (subset)
length(temp2000)

## [1] 3

dim(temp2000)

## [1] 12  3

summary(temp2000)

##       temp            year           doy        
##  Min.   :11.83   Min.   :2000   Min.   : 14.00  
##  1st Qu.:12.74   1st Qu.:2000   1st Qu.: 97.75  
##  Median :14.00   Median :2000   Median :182.00  
##  Mean   :13.94   Mean   :2000   Mean   :182.58  
##  3rd Qu.:15.17   3rd Qu.:2000   3rd Qu.:266.25  
##  Max.   :15.81   Max.   :2000   Max.   :350.00

# another way to create a subset is to use the specialized subset function
# making a subset of all years after 2000 (>2000 is basically the same as => 2001)
# subset function creates a subset of a larger dataset with two arguments: 
#      1. dataset -> in this case, temperature
#      2. logical test -> here, year (what we want to index by)
#      subset(dataset,logicaltest)
temp2000plus <- subset(temperature, year > 2000)
dim(temp2000plus)

## [1] 246   3

summary(temp2000plus)

##       temp            year           doy       
##  Min.   :11.88   Min.   :2001   Min.   : 14.0  
##  1st Qu.:13.08   1st Qu.:2006   1st Qu.: 76.0  
##  Median :14.30   Median :2011   Median :167.0  
##  Mean   :14.26   Mean   :2011   Mean   :180.4  
##  3rd Qu.:15.59   3rd Qu.:2016   3rd Qu.:259.0  
##  Max.   :16.36   Max.   :2021   Max.   :350.0

# only records from the year 2000 and occurring in the first half of each month of recorded
# temperatures within that year
temp2000andfirsthalfmonth <- subset(temperature, year == 2000 & doy < 16)

# **Boxplot** of subset temp2000
boxplot(temp2000[,"temp"], ylab = "Temperature", xlab = "Time")

# plotting the distribution of global temperatures for the year 2000
summaryInfo <- summary(temp2000[, "temp"])
summaryInfo

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.83   12.74   14.00   13.94   15.17   15.81

summaryInfo["Median"]

## Median 
##     14

# finding the data structure type/class of the new summaryInfo object
class(summaryInfo)

## [1] "summaryDefault" "table"

# the function abline adds lines to a plot
# a,b -> the intercept and slope, single values
# h -> the y-value(s) for horizontal line(s)
# v -> the x-values for vertical line(s)
# lty, col, lwd -> graphical parameters
?abline()
abline(h=summaryInfo, lty=2, col="red")

# ALTERNATIVE
# forcing summaryInfo values to be numeric values, with two decimal places
onlynumbersSummary <- round(as.numeric(summaryInfo), 3)
abline(h=onlynumbersSummary, lty=2, col="red")

# another very useful graph: the **Histogram**!
# Very common plot in data analysis which gives the frequencies (i.e. counts) of data 
# values that fall within certain ranges
#       1. "bin"/"bucket" (i.e. divide) the entire range of values into a series of intervals
#       2. count how many values fall into each interval
# sometimes confused with bar charts, which compares different categories
hist(temp2000[,"temp"],xlab="Temperature",main="Global Mean Temperature in 2000")

# plotting the global temperature as a function of the day of year for the year 2000
# using a Scatterplot (uses Cartesian coordinate system)
# use ?plot() for ideas!
plot(temp2000[,"doy"], temp2000[,"temp"], xlab = "Day of Year", ylab = "Temperature", 
     main = "Mean Temperature in the Year 2000", panel.first = grid(8, 8), type = "b", 
     lwd = 3, col = "blue", pch = 0, cex = 1.2, font.main = 4)

# pch=0 -> open squares (what i used)
# pch=1 -> open circle (default)
# pch=16 -> filled circle (solid dot)
# pch=2 -> triangle
# pch=3 -> plus sign
# pch=4 -> cross/X
# pch=15 -> filled square
# pch=17 -> filled triangle
# pch="*" -> asterisks
# pch="+" -> plus signs

Exercise 4

What was the mean global temperature from the start of 1880 to the end of 1945? And how much warmer (in C^o) is the Earth thanks to its atmosphere (compare the mean to T_E)?

# using indexing and conditionals for the year range (remember that R uses "&", not "and")
years1880to1945 <- temperature[,"year"] >= 1880 & temperature[,"year"] <= 1945
temp1880to1945 <- temperature[years1880to1945, ]

# or a cleaner option (also by indexing):
years1880ti1945 <- temperature$year >= 1880 & temperature$year <= 1945
temp1880to1945 <- temperature[years1880to1945, ]

# another way is by using the specialized subset function
temp1880to1945 <- subset(temperature, year >= 1880 & year <= 1945)

# finding the mean temperature for the time period 1880-1945
# two different ways!!
summary(temp1880to1945)

##       temp            year           doy        
##  Min.   :10.78   Min.   :1880   Min.   : 14.00  
##  1st Qu.:12.02   1st Qu.:1896   1st Qu.: 97.75  
##  Median :13.38   Median :1912   Median :182.00  
##  Mean   :13.33   Mean   :1912   Mean   :182.58  
##  3rd Qu.:14.70   3rd Qu.:1929   3rd Qu.:266.25  
##  Max.   :15.63   Max.   :1945   Max.   :350.00

meantemp1880to1945 <- mean(temp1880to1945[,"temp"])
meantemp1880to1945

## [1] 13.33405

# calculating the difference from T_E baseline (thanks to atmosphere!)
greenhouseeffect <- meantemp1880to1945 - T_E
greenhouseeffect

## [1] 30.66944

Loading, manipulating and exploring datasets (continued, again…)

# apply functions are a set of loop functions in R -> can be used to calculate column and row means
# the main difference is that they are more efficient than a "for loop" because they require less 
# lines of code (less chance for coding error) and are also often faster
# important: check for missing values - if necessary remove them using na.rm = TRUE, but make sure 
# to store the value without missing values in a separate object!
apply(temp2000plus, 2, mean)

##       temp       year        doy 
##   14.26167 2010.75610  180.35366

# the same can be done using the ColMeans and RowMeans commands
# even if these don't make sense for this example lol
colMeans(temp2000plus)

##       temp       year        doy 
##   14.26167 2010.75610  180.35366

# excluded for the sake of summary document: rowMeans(temp2000plus)

# using the **tapply** function to calculate the median temperature for each year after 2000
? tapply
# tapply(argument 1,argument 2, argument 3)
#      argument 1 -> the variable we are interested in applying the function to ("temp")
#      argument 2 -> the variable we want to group the temperature records by ("year")
#      argument 3 -> the function we want to use ("median")
yearlytemp1 <- tapply(temp2000plus[, "temp"], temp2000plus[, "year"], median)
yearlytemp1

##   2001   2002   2003   2004   2005   2006   2007   2008   2009   2010   2011 
## 14.090 14.140 14.220 14.185 14.295 14.165 14.250 14.185 14.210 14.360 14.235 
##   2012   2013   2014   2015   2016   2017   2018   2019   2020   2021 
## 14.335 14.195 14.380 14.500 14.575 14.500 14.530 14.590 14.585 13.840

# to plot this, the x ("year") and y ("temp") variables must first be defined
years1 <- 2001:2021
plot(years1, yearlytemp1, xlab = "Year", ylab = "Temperature", main = "Median Temperature 2001-2021", 
     col = "red", pch = 16, type = "b", cex = 1.2, font.main = 4)

# the graph shows that the median temperature appeared to have been increasing between 2001 and 2020, 
# and then sharply declined in 2021...why could this be? maybe due to the covid-19 pandemic?
# unfortunately, this may look surprising or impressive at first glance, but is not real, valid or trustworthy
# let's look closely at the temperature data for 2021 by creating a subset
subset(temp2000plus, year == 2021)

##       temp year doy
## 1693 12.39 2021  14
## 1694 12.48 2021  47
## 1695 13.40 2021  76
## 1696 14.28 2021 105
## 1697 15.26 2021 138
## 1698 16.00 2021 167

# or, alternatively by creating an index for year 2021
temp2021 <- temperature[, "year"] == 2021
temperature[temp2021,]

##       temp year doy
## 1693 12.39 2021  14
## 1694 12.48 2021  47
## 1695 13.40 2021  76
## 1696 14.28 2021 105
## 1697 15.26 2021 138
## 1698 16.00 2021 167

# if it's not clear, let's make it even clearer by using tapply for the length of records per year
# this will show the amount of temperature entries for each year in the subset
tapply(temp2000plus[,"temp"], temp2000plus[,"year"], length)

## 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 
##   12   12   12   12   12   12   12   12   12   12   12   12   12   12   12   12 
## 2017 2018 2019 2020 2021 
##   12   12   12   12    6

# we can see that year 2021 has half the entries of all other years, so really what we are capturing is 
# seasonality! the temperatures appear to decline in 2021 given no records of summer temperatures
barplot(tapply(temp2000plus[,"temp"], temp2000plus[,"year"], length))

# the year 2021 must be removed from the data subset to make the median temperatures comparable
temp2000pluswithout2021 <- subset(temp2000plus, year >= 2000 & year < 2021)
yearlytemp2 <- tapply(temp2000pluswithout2021[, "temp"], temp2000pluswithout2021[, "year"], median)
yearlytemp2

##   2001   2002   2003   2004   2005   2006   2007   2008   2009   2010   2011 
## 14.090 14.140 14.220 14.185 14.295 14.165 14.250 14.185 14.210 14.360 14.235 
##   2012   2013   2014   2015   2016   2017   2018   2019   2020 
## 14.335 14.195 14.380 14.500 14.575 14.500 14.530 14.590 14.585

# and let's plot it again without the year 2021 as well, using the same style and color
years2 <- 2001:2020
plot(years2, yearlytemp2, xlab = "Year", ylab = "Temperature", main = "Median Temperature 2001-2020", 
     col = "red", pch = 16, type = "b", cex = 1.2, font.main = 4)

# to calculate the exact difference (increase) of the global median temperature in the period 2001-2020
yearlytemp2[20]-yearlytemp2[1]

##  2020 
## 0.495

# then we look at the change over time
(yearlytemp2[20]-yearlytemp2[1])/length(yearlytemp2)

##    2020 
## 0.02475

# this implies the median temperature on Earth has been increasing by 0.02475 degrees Celsius per year!

Exercise 5

What was the mean global temperature increase from the start of 1880 to the end of 2020?
How much did the Earth’s temperature increase on average per year?
How does this increase compare to the maximum increase agreed on at the Paris Climate Accords?
And how soon will the Earth pass that threshold at the current observed rate?

# not sure if the question refers to calculating the difference between the start of 1880 and end of 2020
# or the difference between the global mean temperature in 1880 compared to 2020. i will do both
# first, i'll compare the temperatures in the first half of 1880 to those in the second half of 2020
temp1880firsthalf <- subset(temperature, year == 1880 & doy < 184)
temp2020secondhalf <- subset(temperature, year == 2020 & doy > 183)
temp1 <- mean(temp1880firsthalf[, "temp"])
temp2 <- mean(temp2020secondhalf[, "temp"])
tempdifferencemethod1 <- temp2 - temp1
tempdifferencemethod1

## [1] 1.798333

# second, i'll compare the difference in global mean temperature between 1880 and 2020
temp1880to2020 <- subset(temperature, year < 2021)
yearlytempAll <- tapply(temp1880to2020[, "temp"], temp1880to2020[, "year"], mean)
length(yearlytempAll)

## [1] 141

tempdifferencemethod2 <- yearlytempAll[141]-yearlytempAll[1]
# or
tempdifferencemethod2 <- yearlytempAll["2020"]-yearlytempAll["1880"]
tempdifferencemethod2

##   2020 
## 1.1775

# for climate change analysis, Method 2 (1.1775°C) is correct, as climate scientists always use annual
# averages when measuring long-term temperature trends - it eliminates seasonal bias
# furthermore, the ~1.2°C increase aligns with documented global warming over the past 140 years
years3 <- 1880:2020
plot(years3, yearlytempAll, xlab = "Year", ylab = "Temperature", main = "Mean Temperature 1880-2020", 
     col = "blue", pch = 16, type = "o", cex = 0.6, font.main = 4)

# to calculate the exact difference (increase) of the global mean temperature in the period 1880-2020
length(yearlytempAll)

## [1] 141

yearlytempAll[141]-yearlytempAll[1] # tempdifferencemethod2

##   2020 
## 1.1775

# but i want to calculate the yearly change over time
annual_warming_rate <- (yearlytempAll[141]-yearlytempAll[1])/length(yearlytempAll)

# how does this compare to the maximum increase agreed at the Paris Climate Accords?
# essentially, how does it compare to the goal of limiting global warming to 1.5 degrees Celsius?
remaining_warming <- 1.5 - tempdifferencemethod2

# we're not quite over the limit yet it seems (though this is data up to and including 2020 only!)
# how soon will the Earth pass the 1.5 degrees Celsius threshold at the observed rate?
years_to_threshold1 <- remaining_warming / annual_warming_rate
threshold_year1 <- 2020 + years_to_threshold1
threshold_year1

##     2020 
## 2058.618

# what about 2 degrees Celsius? 
remaining_warming_2C <- 2 - tempdifferencemethod2
years_to_threshold2 <- remaining_warming_2C / annual_warming_rate
threshold_year2 <- 2020 + years_to_threshold2
threshold_year2

##    2020 
## 2118.49

Introduction to R: Tutorials 3, 4 & 5

Lucia Causape Bueno

2025-09-07