Abstract: Data includes the yearlyaverage change of temperature in celsius for each country. We will change the results to Farenheit and find which country has the bigger temperature change on average. Do they follow a normal distribution? How does it compare with US?

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)

climate <- read.csv("https://raw.githubusercontent.com/Angelogallardo05/Week6/main/Indicator_3_1_Climate_Indicators_Annual_Mean_Global_Surface_Temperature_577579683071085080.csv")
head(climate)
##   ObjectId                      Country ISO2 ISO3
## 1        1 Afghanistan, Islamic Rep. of   AF  AFG
## 2        2                      Albania   AL  ALB
## 3        3                      Algeria   DZ  DZA
## 4        4               American Samoa   AS  ASM
## 5        5     Andorra, Principality of   AD  AND
## 6        6                       Angola   AO  AGO
##                                                                                          Indicator
## 1 Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980
## 2 Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980
## 3 Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980
## 4 Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980
## 5 Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980
## 6 Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980
##             Unit
## 1 Degree Celsius
## 2 Degree Celsius
## 3 Degree Celsius
## 4 Degree Celsius
## 5 Degree Celsius
## 6 Degree Celsius
##                                                                                                                                                                                                                                                Source
## 1 Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET. Accessed on 2023-03-28.
## 2 Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET. Accessed on 2023-03-28.
## 3 Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET. Accessed on 2023-03-28.
## 4 Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET. Accessed on 2023-03-28.
## 5 Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET. Accessed on 2023-03-28.
## 6 Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET. Accessed on 2023-03-28.
##   CTS.Code                   CTS.Name
## 1     ECCS Surface Temperature Change
## 2     ECCS Surface Temperature Change
## 3     ECCS Surface Temperature Change
## 4     ECCS Surface Temperature Change
## 5     ECCS Surface Temperature Change
## 6     ECCS Surface Temperature Change
##                                                           CTS.Full.Descriptor
## 1 Environment, Climate Change, Climate Indicators, Surface Temperature Change
## 2 Environment, Climate Change, Climate Indicators, Surface Temperature Change
## 3 Environment, Climate Change, Climate Indicators, Surface Temperature Change
## 4 Environment, Climate Change, Climate Indicators, Surface Temperature Change
## 5 Environment, Climate Change, Climate Indicators, Surface Temperature Change
## 6 Environment, Climate Change, Climate Indicators, Surface Temperature Change
##    X1961  X1962  X1963  X1964  X1965 X1966  X1967  X1968  X1969  X1970  X1971
## 1 -0.113 -0.164  0.847 -0.764 -0.244 0.226 -0.371 -0.423 -0.539  0.813  0.619
## 2  0.627  0.326  0.075 -0.166 -0.388 0.559 -0.074  0.081 -0.013 -0.106 -0.195
## 3  0.164  0.114  0.077  0.250 -0.100 0.433 -0.026 -0.067  0.291  0.116 -0.385
## 4  0.079 -0.042  0.169 -0.140 -0.562 0.181 -0.368 -0.187  0.132 -0.047 -0.477
## 5  0.736  0.112 -0.752  0.308 -0.490 0.415  0.637  0.018 -0.137  0.121 -0.326
## 6  0.041 -0.152 -0.190 -0.229 -0.196 0.175 -0.081 -0.193  0.188  0.248 -0.097
##    X1972  X1973  X1974  X1975  X1976  X1977  X1978 X1979  X1980  X1981  X1982
## 1 -1.124  0.232 -0.489 -0.445 -0.286  0.513  0.129 0.361  0.600  0.483 -0.346
## 2 -0.069 -0.288 -0.139 -0.211 -0.683  0.545 -0.814 0.203 -0.414 -0.351  0.173
## 3 -0.348 -0.015 -0.503 -0.539 -0.782  0.504  0.012 0.654  0.232  0.215  0.399
## 4 -0.067  0.330 -0.308 -0.118 -0.177  0.156  0.092 0.341  0.350  0.179  0.280
## 5 -0.499  0.025 -0.371  0.246 -0.045 -0.093 -0.163 0.058 -0.188  0.178  1.044
## 6 -0.035  0.475 -0.158 -0.029 -0.313  0.272  0.037 0.291  0.279 -0.071  0.164
##    X1983  X1984  X1985  X1986  X1987 X1988  X1989 X1990  X1991  X1992  X1993
## 1  0.164  0.145  0.283 -0.141  0.391 0.919 -0.205 0.730 -0.168 -0.294  0.220
## 2 -0.128 -0.270 -0.103  0.569 -0.106 0.370 -0.066 0.795 -0.269  0.106  0.076
## 3  0.560 -0.004  0.508  0.296  0.975 1.304  0.386 1.266  0.031 -0.312  0.552
## 4  0.313  0.277  0.256  0.394  0.354 0.509  0.143 0.497  0.641  0.344 -0.069
## 5  0.859 -0.157  0.059  0.387  0.397 0.883  1.162 1.736  0.231  0.386  0.174
## 6  0.487  0.631  0.694  0.176  0.689 0.572 -0.055 0.687  0.341  0.466  0.256
##   X1994  X1995  X1996 X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005
## 1 0.430  0.359 -0.116 0.471 0.675 1.198 0.993 1.311 1.365 0.587 1.373 0.401
## 2 1.330 -0.172 -0.038 0.075 0.795 0.670 1.065 1.532 0.492 0.970 0.444 0.189
## 3 0.732  0.595  0.846 1.059 1.109 1.476 0.820 1.856 1.258 1.585 0.988 1.264
## 4 0.189  0.755  0.784    NA    NA 0.242 0.626 0.904 1.152 0.716 0.191 0.801
## 5 1.508  1.279  0.570 1.788 1.018 1.055 1.050 1.480 0.835 1.949 0.936 0.851
## 6 0.212  0.753  0.370 0.107 1.064 0.417 0.169 0.295 0.735 0.889 0.414 1.021
##   X2006 X2007 X2008 X2009 X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018
## 1 1.720 0.675 0.704 0.895 1.613 1.397 0.223 1.281 0.456 1.093 1.555 1.540 1.544
## 2 0.345 1.316 0.978 0.910 1.191 1.055 1.487 1.333 1.198 1.569 1.464 1.121 2.028
## 3 1.395 1.220 1.185 0.945 2.265 1.398 1.147 1.192 1.690 1.121 1.757 1.512 1.210
## 4 0.403 1.032 0.670    NA 1.311 0.854 0.924 1.257 1.170 1.009 1.539 1.435 1.189
## 5 1.485 1.024 0.946 1.413 0.471 1.677 1.265 0.831 1.946 1.690 1.990 1.925 1.919
## 6 0.561 0.885 0.501 0.708 1.194 0.880 0.552 1.044 0.828 1.331 1.609 0.870 1.395
##   X2019 X2020 X2021 X2022
## 1 0.910 0.498 1.327 2.012
## 2 1.675 1.498 1.536 1.518
## 3 1.115 1.926 2.330 1.688
## 4 1.539 1.430 1.268 1.256
## 5 1.964 2.562 1.533 3.243
## 6 1.752 1.162 1.553 1.212

remove duplicate and unecessary columns

climate <- climate[, -c(4, 5, 7, 8, 9)]

remove the X in the names of the Year’s columns

names(climate)[6:ncol(climate)] <- gsub("^X", "", names(climate)[6:ncol(climate)])

Remove rows with NA

climate_clean <- na.omit(climate)

#Add “World” to the ISO2 column , resulting in no blanks for that column

climate_clean$ISO2[155] <- "World"
climate_clean <- climate_clean %>%
  mutate(across(6:last_col(), ~ . * 9/5))

change the column description from degree celsius to Degree Farenheit

climate_clean <- climate_clean %>%
  mutate(Unit = if_else(Unit == "Degree Celsius", "Degree Fahrenheit", Unit))
climate_clean <- climate_clean %>%
  mutate(average_temp_change = rowMeans(select(., 6:last_col()), na.rm = TRUE))

#Sort by highest to lowest average temperature change by country

sorted_data <- climate_clean %>%
  arrange(desc(average_temp_change))

analyze mongolia, normal distribution of temperatur change, has a normal distribution

mongolia_data <- climate_clean %>% 
  filter(Country == "Mongolia") 

# Extract temperature change values
temperature_change <- unlist(mongolia_data[, 6:ncol(mongolia_data)], use.names = FALSE)

# Calculate mean and standard deviation
mean_temp_change <- mean(temperature_change, na.rm = TRUE)
sd_temp_change <- sd(temperature_change, na.rm = TRUE)

# Generate simulated normal distribution
simulated_data_mongolia <- rnorm(n = 1000, mean = mean_temp_change, sd = sd_temp_change)

# Create histogram
ggplot() +
  geom_histogram(aes(x = simulated_data_mongolia), binwidth = 0.05, fill = "skyblue", color = "black") +
  labs(title = "Simulated Normal Distribution of Temperature Change for Mongolia",
       x = "Temperature Change",
       y = "Frequency")

quartiles <- quantile(simulated_data_mongolia, probs = c(0.25, 0.5, 0.75))

# Print quartiles
print(quartiles)
##       25%       50%       75% 
## 0.5076652 1.5849147 2.5580434

repeat for US, also a normal distribution for temperature change

US temperature changes quite lower than Mongolia. It has a more normal distribution with a shorter range.

us_data <- climate_clean %>% 
  filter(Country == "United States") 

# Extract temperature change values
temperature_change <- unlist(us_data[, 6:ncol(us_data)], use.names = FALSE)

# Calculate mean and standard deviation
usmean_temp_change <- mean(temperature_change, na.rm = TRUE)
ussd_temp_change <- sd(temperature_change, na.rm = TRUE)

# Generate simulated normal distribution
simulated_data_us <- rnorm(n = 1000, mean = usmean_temp_change, sd = ussd_temp_change)

# Create histogram
ggplot() +
  geom_histogram(aes(x = simulated_data_us), binwidth = 0.05, fill = "skyblue", color = "black") +
  labs(title = "Simulated Normal Distribution of Temperature Change for Mongolia",
       x = "Temperature Change",
       y = "Frequency")

quartiles <- quantile(simulated_data_us, probs = c(0.25, 0.5, 0.75))

# Print quartiles
print(quartiles)
##       25%       50%       75% 
## 0.1937738 0.8820742 1.6075091