Este é um notebook R Markdown. Os dados obtidos estão disponíveis em: https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data
Pacotes necessários
require(ggplot2)
require(dplyr)
require(stringr)
Carga de dados
df <- read.csv("../data/GlobalClimate/GlobalLandTemperaturesByCity.csv")
head(df)
Remoção de medidas nulas
df <- df %>% filter(!is.na(AverageTemperature))
head(df)
Preparação de dados formatando datas e valores numéricos
df$dt <- as.Date(df$dt)
df$Ano <- as.numeric(format(df$dt, format="%Y"))
head(df)
Obtenção de dados consolidados anualmente
df_anual <- aggregate(AverageTemperature ~ Ano + City, df, mean)
head(df)
head(df_anual)
Filtrando períodos de interesse
df_um <- df_anual %>% filter(Ano == 2013)
head(df_um)
Aplicando um join de tabelas para ontenção de dados necessários
df_um = left_join(df_um, df, by = c("City" = "City", "Ano" = "Ano"))
df_um$AverageTemperature.y = NULL
colnames(df_um)[3] <- "AverageTemperature"
head(df_um)
Tratamento de coordenadas
df_um$Latitude_num <- as.numeric(str_sub(df_um$Latitude,1,nchar(df_um$Latitude)-1))
df_um$Longitude_num <- as.numeric(str_sub(df_um$Longitude,1,nchar(df_um$Longitude)-1))
df_um$Latitude_chr <- (str_extract(df_um$Latitude, "[aA-zZ]+"))
df_um$Longitude_chr <- (str_extract(df_um$Longitude, "[aA-zZ]+"))
df_um <- within(df_um, {
Lat <- ifelse(Latitude_chr=="N", Latitude_num, -Latitude_num)
Long <- ifelse(Longitude_chr=="E", Longitude_num, -Longitude_num)
})
head(df_um)
Plotando o mapa de temperaturas
mapamundial <- map_data("world")
ggplot() +
geom_polygon(data = mapamundial, aes(x = long, y = lat, group = group),
fill = "lightgray",
colour = "white") +
coord_fixed(1.3) +
geom_point(aes(x = df_um$Long, y = df_um$Lat, color = df_um$AverageTemperature)) +
geom_jitter() +
labs(x = " ", y = " ", subtitle = "2013") +
theme_minimal() +
scale_color_distiller("Temperatura Atmosférica", palette = "Spectral", limit=c(-32, 40))
df_dois <- df_anual %>% filter(Ano == 1913)
df_dois = left_join(df_dois, df, by = c("City" = "City", "Ano" = "Ano"))
df_dois$AverageTemperature.y = NULL
colnames(df_dois)[3] <- "AverageTemperature"
df_dois$Latitude_num <- as.numeric(str_sub(df_dois$Latitude,1,nchar(df_dois$Latitude)-1))
df_dois$Longitude_num <- as.numeric(str_sub(df_dois$Longitude,1,nchar(df_dois$Longitude)-1))
df_dois$Latitude_chr <- (str_extract(df_dois$Latitude, "[aA-zZ]+"))
df_dois$Longitude_chr <- (str_extract(df_dois$Longitude, "[aA-zZ]+"))
df_dois <- within(df_dois, {
Lat <- ifelse(Latitude_chr=="N", Latitude_num, -Latitude_num)
Long <- ifelse(Longitude_chr=="E", Longitude_num, -Longitude_num)
})
ggplot() +
geom_polygon(data = mapamundial, aes(x = long, y = lat, group = group),
fill = "lightgray",
colour = "white") +
coord_fixed(1.3) +
geom_point(aes(x = df_dois$Long, y = df_dois$Lat, color = df_dois$AverageTemperature)) +
geom_jitter() +
labs(x = " ", y = " ", subtitle = "1913") +
theme_minimal() +
scale_color_distiller("Temperatura Atmosférica", palette = "Spectral", limit=c(-32, 40))
Reunião dos dois datasets para obtencão da diferenca entre as médias em um século.
dfcomparativo <- merge(df_um, df_dois, by = c("City"), all.x = TRUE)
dfcomparativo$DiferencaTemperaturaMedia <- dfcomparativo$AverageTemperature.x - dfcomparativo$AverageTemperature.y
glimpse(dfcomparativo)
## Rows: 355,788
## Columns: 28
## $ City <chr> "A Coruña", "A Coruña", "A Coruña", "A…
## $ Ano.x <dbl> 2013, 2013, 2013, 2013, 2013, 2013, 20…
## $ AverageTemperature.x <dbl> 13.98375, 13.98375, 13.98375, 13.98375…
## $ dt.x <date> 2013-01-01, 2013-01-01, 2013-01-01, 2…
## $ AverageTemperatureUncertainty.x <dbl> 0.377, 0.377, 0.377, 0.377, 0.377, 0.3…
## $ Country.x <chr> "Spain", "Spain", "Spain", "Spain", "S…
## $ Latitude.x <chr> "42.59N", "42.59N", "42.59N", "42.59N"…
## $ Longitude.x <chr> "8.73W", "8.73W", "8.73W", "8.73W", "8…
## $ Latitude_num.x <dbl> 42.59, 42.59, 42.59, 42.59, 42.59, 42.…
## $ Longitude_num.x <dbl> 8.73, 8.73, 8.73, 8.73, 8.73, 8.73, 8.…
## $ Latitude_chr.x <chr> "N", "N", "N", "N", "N", "N", "N", "N"…
## $ Longitude_chr.x <chr> "W", "W", "W", "W", "W", "W", "W", "W"…
## $ Long.x <dbl> -8.73, -8.73, -8.73, -8.73, -8.73, -8.…
## $ Lat.x <dbl> 42.59, 42.59, 42.59, 42.59, 42.59, 42.…
## $ Ano.y <dbl> 1913, 1913, 1913, 1913, 1913, 1913, 19…
## $ AverageTemperature.y <dbl> 12.96475, 12.96475, 12.96475, 12.96475…
## $ dt.y <date> 1913-01-01, 1913-02-01, 1913-03-01, 1…
## $ AverageTemperatureUncertainty.y <dbl> 0.598, 0.652, 0.676, 0.745, 0.604, 0.5…
## $ Country.y <chr> "Spain", "Spain", "Spain", "Spain", "S…
## $ Latitude.y <chr> "42.59N", "42.59N", "42.59N", "42.59N"…
## $ Longitude.y <chr> "8.73W", "8.73W", "8.73W", "8.73W", "8…
## $ Latitude_num.y <dbl> 42.59, 42.59, 42.59, 42.59, 42.59, 42.…
## $ Longitude_num.y <dbl> 8.73, 8.73, 8.73, 8.73, 8.73, 8.73, 8.…
## $ Latitude_chr.y <chr> "N", "N", "N", "N", "N", "N", "N", "N"…
## $ Longitude_chr.y <chr> "W", "W", "W", "W", "W", "W", "W", "W"…
## $ Long.y <dbl> -8.73, -8.73, -8.73, -8.73, -8.73, -8.…
## $ Lat.y <dbl> 42.59, 42.59, 42.59, 42.59, 42.59, 42.…
## $ DiferencaTemperaturaMedia <dbl> 1.019, 1.019, 1.019, 1.019, 1.019, 1.0…
Diferenca mínima
min(dfcomparativo$DiferencaTemperaturaMedia)
## [1] -1.443708
Diferenca máxima
max(dfcomparativo$DiferencaTemperaturaMedia)
## [1] 4.419583
ggplot() +
geom_polygon(data = mapamundial, aes(x = long, y = lat, group = group),
fill = "lightgray",
colour = "white") +
coord_fixed(1.3) +
geom_point(aes(x = dfcomparativo$Long.x, y = dfcomparativo$Lat.x, color = dfcomparativo$DiferencaTemperaturaMedia)) +
geom_jitter() +
labs(x = " ", y = " ", subtitle = "2013 - 1913") +
theme_minimal() +
scale_color_distiller("Diferença na Temperatura Atmosférica", palette = "Spectral")