#Importing the necessary packages
library(readr)
package <U+393C><U+3E31>readr<U+393C><U+3E32> was built under R version 3.5.3
library(knitr)
package <U+393C><U+3E31>knitr<U+393C><U+3E32> was built under R version 3.5.3
library(readxl)
package <U+393C><U+3E31>readxl<U+393C><U+3E32> was built under R version 3.5.3
library(zoo)
package <U+393C><U+3E31>zoo<U+393C><U+3E32> was built under R version 3.5.3
Attaching package: <U+393C><U+3E31>zoo<U+393C><U+3E32>
The following objects are masked from <U+393C><U+3E31>package:base<U+393C><U+3E32>:
as.Date, as.Date.numeric
The data being analysed in this assignment is the average montly temperatures in Malaysia between the years 1991 and 2016.
Source: https://climateknowledgeportal.worldbank.org/download-data
# 1. Importing the data from the working directory.
temp <- read_csv("tas_1991_2016_MYS.csv")
Parsed with column specification:
cols(
`Temperature - (Celsius)` = [32mcol_double()[39m,
Year = [32mcol_double()[39m,
Month = [31mcol_character()[39m,
Country = [31mcol_character()[39m,
ISO3 = [31mcol_character()[39m
)
# 2. Viewing the data and column names.
head(temp)
# 3. Converting months from mmm format to numbers.
temp$Month <- match(temp$Month, month.abb)
# 4. Combining the month and year columns into a new YearMonth column
temp$MonthYear <- as.yearmon(paste(temp$Year, " ", temp$Month), "%Y %m")
# 5. Removing unnecessary columns.
temp <- temp[-(2:5)]
# 6. Rearranging the columns for presentation.
temp <- temp[,c(2, 1)]
# 7. Reviewing data.
head(temp)
# 8. Saving data as a data frame.
temp1 <- data.frame(MonthYear = temp$MonthYear, Temperature = temp$`Temperature - (Celsius)`)
saveRDS(temp1, file = "temp1.RData")
# Checking the structure of the data
str(temp1)
'data.frame': 312 obs. of 2 variables:
$ MonthYear : 'yearmon' num Jan 1991 Feb 1991 Mar 1991 Apr 1991 ...
$ Temperature: num 25.4 25.3 25.8 26 26.1 ...
#Checking the classes of the variables
class(temp1$MonthYear)
[1] "yearmon"
typeof(temp1$MonthYear)
[1] "double"
class(temp1$Temperature)
[1] "numeric"
typeof(temp1$Temperature)
[1] "double"
After subsetting the first 10 observations and converting them to a matrix, the data ends up being a matrix with a list of 2. This is due to there being only 2 variables in the original dataset.
#Subsetting first 10 observations
temp2 <- temp1[(1:10),]
head(temp2)
#Converting subsetted data into a matrix
tempmatrix <- matrix(temp2)
#Checking structure of the matrix
str(tempmatrix)
List of 2
$ : num [1:10] 1991 1991 1991 1991 1991 ...
$ : num [1:10] 25.4 25.3 25.8 26 26.1 ...
- attr(*, "dim")= int [1:2] 2 1
# Subsetting the first and last variable of the dataset
temp3 <- temp2[, 1:2]
head(temp3)
#Saving the dataset as RData
save(temp3, file = "temp3.RData")
# Creating new variables and observations
G <- factor( c("Very Fast", "Fast", "Normal", "Slow"), levels = c("Very Fast", "Fast", "Normal", "Slow") ,ordered=TRUE)
M <- c(120, 105, 80, 30)
#Converting variables and observations into a data frame
data <- data.frame( Speed = M, Level = G)
head(data)
#View structure of data and levels of ordinal variable
str(data)
'data.frame': 4 obs. of 2 variables:
$ Speed: num 120 105 80 30
$ Level: Ord.factor w/ 4 levels "Very Fast"<"Fast"<..: 1 2 3 4
levels(data$Level)
[1] "Very Fast" "Fast" "Normal" "Slow"
#Creating new numeric vector
num <- c(7,15, 4, 9)
#Adding numeric vector to the data frame
data1 <- cbind(data, num)
head(data1)
#Checking structure and attributes of new data frame
str(data1)
'data.frame': 4 obs. of 3 variables:
$ Speed: num 120 105 80 30
$ Level: Ord.factor w/ 4 levels "Very Fast"<"Fast"<..: 1 2 3 4
$ num : num 7 15 4 9
class(data1$Speed)
[1] "numeric"
class(data1$Level)
[1] "ordered" "factor"
class(data1$num)
[1] "numeric"