Este é um notebook R Markdown. Os dados obtidos estão disponíveis em: https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data

Pacotes necessários

require(ggplot2)
require(dplyr)
require(stringr)

Carga de dados

df <- read.csv("../data/GlobalClimate/GlobalLandTemperaturesByCity.csv")
head(df)

Remoção de medidas nulas

df <- df %>% filter(!is.na(AverageTemperature))
head(df)

Preparação de dados formatando datas e valores numéricos

df$dt <- as.Date(df$dt)
df$Ano <- as.numeric(format(df$dt, format="%Y"))
head(df)

Obtenção de dados consolidados anualmente

df_anual <- aggregate(AverageTemperature ~ Ano + City, df, mean)
head(df)
head(df_anual)

Filtrando períodos de interesse

df_um <- df_anual %>% filter(Ano == 2013)
head(df_um)

Aplicando um join de tabelas para ontenção de dados necessários

df_um = left_join(df_um, df, by = c("City" = "City", "Ano" = "Ano"))
df_um$AverageTemperature.y = NULL
colnames(df_um)[3] <- "AverageTemperature"
head(df_um)

Tratamento de coordenadas

df_um$Latitude_num <- as.numeric(str_sub(df_um$Latitude,1,nchar(df_um$Latitude)-1))
df_um$Longitude_num <- as.numeric(str_sub(df_um$Longitude,1,nchar(df_um$Longitude)-1))
df_um$Latitude_chr <- (str_extract(df_um$Latitude, "[aA-zZ]+"))
df_um$Longitude_chr <- (str_extract(df_um$Longitude, "[aA-zZ]+"))
df_um <- within(df_um, {
  Lat <- ifelse(Latitude_chr=="N", Latitude_num, -Latitude_num)
  Long <- ifelse(Longitude_chr=="E", Longitude_num, -Longitude_num)
})
head(df_um)

Plotando o mapa de temperaturas

mapamundial <- map_data("world")
ggplot() + 
  geom_polygon(data = mapamundial, aes(x = long, y = lat, group = group), 
               fill = "lightgray",
               colour = "white") + 
  coord_fixed(1.3) +
  geom_point(aes(x = df_um$Long, y = df_um$Lat, color = df_um$AverageTemperature)) +
  geom_jitter() +
  labs(x = " ", y = " ", subtitle = "2013") +
  theme_minimal() +
  scale_color_distiller("Temperatura Atmosférica", palette = "Spectral", limit=c(-32, 40))  

df_dois <- df_anual %>% filter(Ano == 1913)
df_dois = left_join(df_dois, df, by = c("City" = "City", "Ano" = "Ano"))
df_dois$AverageTemperature.y = NULL
colnames(df_dois)[3] <- "AverageTemperature"
df_dois$Latitude_num <- as.numeric(str_sub(df_dois$Latitude,1,nchar(df_dois$Latitude)-1))
df_dois$Longitude_num <- as.numeric(str_sub(df_dois$Longitude,1,nchar(df_dois$Longitude)-1))
df_dois$Latitude_chr <- (str_extract(df_dois$Latitude, "[aA-zZ]+"))
df_dois$Longitude_chr <- (str_extract(df_dois$Longitude, "[aA-zZ]+"))
df_dois <- within(df_dois, {
  Lat <- ifelse(Latitude_chr=="N", Latitude_num, -Latitude_num)
  Long <- ifelse(Longitude_chr=="E", Longitude_num, -Longitude_num)
})

ggplot() + 
  geom_polygon(data = mapamundial, aes(x = long, y = lat, group = group), 
               fill = "lightgray",
               colour = "white") + 
  coord_fixed(1.3) +
  geom_point(aes(x = df_dois$Long, y = df_dois$Lat, color = df_dois$AverageTemperature)) +
  geom_jitter() +
  labs(x = " ", y = " ", subtitle = "1913") +
  theme_minimal() +
  scale_color_distiller("Temperatura Atmosférica", palette = "Spectral", limit=c(-32, 40))  

Reunião dos dois datasets para obtencão da diferenca entre as médias em um século.

dfcomparativo <- merge(df_um, df_dois, by = c("City"), all.x = TRUE)
dfcomparativo$DiferencaTemperaturaMedia <- dfcomparativo$AverageTemperature.x - dfcomparativo$AverageTemperature.y
glimpse(dfcomparativo)
## Rows: 355,788
## Columns: 28
## $ City                            <chr> "A Coruña", "A Coruña", "A Coruña", "A…
## $ Ano.x                           <dbl> 2013, 2013, 2013, 2013, 2013, 2013, 20…
## $ AverageTemperature.x            <dbl> 13.98375, 13.98375, 13.98375, 13.98375…
## $ dt.x                            <date> 2013-01-01, 2013-01-01, 2013-01-01, 2…
## $ AverageTemperatureUncertainty.x <dbl> 0.377, 0.377, 0.377, 0.377, 0.377, 0.3…
## $ Country.x                       <chr> "Spain", "Spain", "Spain", "Spain", "S…
## $ Latitude.x                      <chr> "42.59N", "42.59N", "42.59N", "42.59N"…
## $ Longitude.x                     <chr> "8.73W", "8.73W", "8.73W", "8.73W", "8…
## $ Latitude_num.x                  <dbl> 42.59, 42.59, 42.59, 42.59, 42.59, 42.…
## $ Longitude_num.x                 <dbl> 8.73, 8.73, 8.73, 8.73, 8.73, 8.73, 8.…
## $ Latitude_chr.x                  <chr> "N", "N", "N", "N", "N", "N", "N", "N"…
## $ Longitude_chr.x                 <chr> "W", "W", "W", "W", "W", "W", "W", "W"…
## $ Long.x                          <dbl> -8.73, -8.73, -8.73, -8.73, -8.73, -8.…
## $ Lat.x                           <dbl> 42.59, 42.59, 42.59, 42.59, 42.59, 42.…
## $ Ano.y                           <dbl> 1913, 1913, 1913, 1913, 1913, 1913, 19…
## $ AverageTemperature.y            <dbl> 12.96475, 12.96475, 12.96475, 12.96475…
## $ dt.y                            <date> 1913-01-01, 1913-02-01, 1913-03-01, 1…
## $ AverageTemperatureUncertainty.y <dbl> 0.598, 0.652, 0.676, 0.745, 0.604, 0.5…
## $ Country.y                       <chr> "Spain", "Spain", "Spain", "Spain", "S…
## $ Latitude.y                      <chr> "42.59N", "42.59N", "42.59N", "42.59N"…
## $ Longitude.y                     <chr> "8.73W", "8.73W", "8.73W", "8.73W", "8…
## $ Latitude_num.y                  <dbl> 42.59, 42.59, 42.59, 42.59, 42.59, 42.…
## $ Longitude_num.y                 <dbl> 8.73, 8.73, 8.73, 8.73, 8.73, 8.73, 8.…
## $ Latitude_chr.y                  <chr> "N", "N", "N", "N", "N", "N", "N", "N"…
## $ Longitude_chr.y                 <chr> "W", "W", "W", "W", "W", "W", "W", "W"…
## $ Long.y                          <dbl> -8.73, -8.73, -8.73, -8.73, -8.73, -8.…
## $ Lat.y                           <dbl> 42.59, 42.59, 42.59, 42.59, 42.59, 42.…
## $ DiferencaTemperaturaMedia       <dbl> 1.019, 1.019, 1.019, 1.019, 1.019, 1.0…

Diferenca mínima

min(dfcomparativo$DiferencaTemperaturaMedia)
## [1] -1.443708

Diferenca máxima

max(dfcomparativo$DiferencaTemperaturaMedia)
## [1] 4.419583
ggplot() + 
  geom_polygon(data = mapamundial, aes(x = long, y = lat, group = group), 
               fill = "lightgray",
               colour = "white") + 
  coord_fixed(1.3) +
  geom_point(aes(x = dfcomparativo$Long.x, y = dfcomparativo$Lat.x, color = dfcomparativo$DiferencaTemperaturaMedia)) +
  geom_jitter() +
  labs(x = " ", y = " ", subtitle = "2013 - 1913") +
  theme_minimal() +
  scale_color_distiller("Diferença na Temperatura Atmosférica", palette = "Spectral")