Dataset 1: Annual estimates of mean surface temperature change measured with respect to a baseline climatology
Analysis: i cleaned the data, transformed and calculated the mean statistic.
Conclusion: Based on this analysis, Luxemberg ,Estonia, Serbia,Belgium and latvia countries may be experiencing a more pronounced impact in terms of temperature change.
library(dplyr,warn.conflicts = FALSE)
library(tidyr,warn.conflicts = FALSE)
library(readr,warn.conflicts = FALSE)
#read the csv file on Climate_Indicators_Annual_Mean_Global_Surface_Temperature
load <- read.csv('https://raw.githubusercontent.com/datanerddhanya/DATA607/main/Indicator_3_1_Climate_Indicators_Annual_Mean_Global_Surface_Temperature_577579683071085080.csv',sep = ",", stringsAsFactors=FALSE, quote = "\"")
#parse the data into a dataframe
#load<- data.frame(load)
# the data is wide as teh years are in the columns. i would like to tidy the data by moving it into #rows.
load_pivot <- load %>%
pivot_longer(
cols = !(ObjectId:CTS.Full.Descriptor) ,
names_to = "Year",
values_to = "Mean.Surface.Temp.change" ,
values_drop_na = TRUE
)
#need to split the Year column to remove the X value in it
load_clean <- load_pivot |>
separate_wider_delim(Year, delim = "X", names = c("Blank", "Year"))
# cleaning up Country column to remove text after comma
extract_value <- function(x) {
if (grepl(",", x)) {
split_values <- strsplit(x, ",")[[1]]
return(trimws(split_values[1]))
} else {
return(trimws(x))
}
}
# Apply the function to the entire column
load_clean$Country <- sapply(load_clean$Country, extract_value)
#Create an new data frame with the required columns
load_final <- load_clean[,c('ObjectId','Country','ISO3','Unit','Year','Mean.Surface.Temp.change')]
# transform(load_final, Mean.Surface.Temp.change = as.numeric(Mean.Surface.Temp.change))
# load_final$Mean.Surface.Temp.change = as.numeric(load_final$Mean.Surface.Temp.change)
#Calculate average climate change over all the years for a country
load_climate_change <- load_final %>%
group_by(Country) %>%
#summarise(across(everything(), list(mean))) %>%
summarise( Mean.Surface.Temp.change = mean(Mean.Surface.Temp.change)) %>%
# summarise(.funs = mean,na.rm=T)
arrange(desc(Mean.Surface.Temp.change))
head(load_climate_change)
## # A tibble: 6 × 2
## Country Mean.Surface.Temp.change
## <chr> <dbl>
## 1 Luxembourg 1.58
## 2 Estonia 1.56
## 3 Serbia 1.54
## 4 Belgium 1.53
## 5 Latvia 1.51
## 6 Belarus 1.51
# calculate overall center and spread
mean = mean(load_climate_change$Mean.Surface.Temp.change)
sd= sd(load_climate_change$Mean.Surface.Temp.change)
#mean
head(mean)
## [1] 0.5820715
#standard deviation
head(sd)
## [1] 0.3223647