library(ggplot2)
library(dplyr)
library(sp)
library(ggmap)
library(plotly)

Data

These data are from a Kaggle dataset, described here. In this Rmarkdown file I’m interested in representing the evolution of the average land temperature vs. time , as well as a geographic representation.

#load data ; remove NA if any and creae 3 new columns for decoding year, month and day data 
#selection is done only for french cities
df<-read.csv('GlobalLandTemperaturesByCity.csv',sep=',')
FR<-filter(df,Country=='France')
FR<-na.omit(FR)
FR$date<-as.Date(FR$dt)
FR$year<-as.numeric(format(FR$date,'%Y'))
FR$month<-as.numeric(format(FR$date,'%m'))
FR$day<-as.numeric(format(FR$date,'%d'))
head(FR)
##            dt AverageTemperature AverageTemperatureUncertainty
## 1  1743-11-01              7.478                         1.866
## 6  1744-04-01             11.596                         2.044
## 7  1744-05-01             13.287                         1.791
## 8  1744-06-01             17.675                         1.733
## 9  1744-07-01             20.056                         1.825
## 11 1744-09-01             15.835                         1.834
##               City Country Latitude Longitude       date year month day
## 1  Aix En Provence  France   44.20N     4.47E 1743-11-01 1743    11   1
## 6  Aix En Provence  France   44.20N     4.47E 1744-04-01 1744     4   1
## 7  Aix En Provence  France   44.20N     4.47E 1744-05-01 1744     5   1
## 8  Aix En Provence  France   44.20N     4.47E 1744-06-01 1744     6   1
## 9  Aix En Provence  France   44.20N     4.47E 1744-07-01 1744     7   1
## 11 Aix En Provence  France   44.20N     4.47E 1744-09-01 1744     9   1

Breakdown per year

# select the AverageTemperature, take it's average per year ; take the min and max of average errors
frData<-as.data.frame(FR %>% group_by(year) %>% select(AverageTemperature,AverageTemperatureUncertainty) %>% summarise(avg_Temp = mean(AverageTemperature),minError = min(AverageTemperatureUncertainty),maxError = max(AverageTemperatureUncertainty)))
plot<-ggplot(data=frData,aes(x=year,y=avg_Temp)) + geom_point(aes(size=maxError),alpha=.75) + geom_smooth(color='red')
print(plot)

#same as above with I wanted to test plot_ly with a Rmarkdown file
plot_ly(frData,x = frData$year, y = frData$avg_Temp, text = paste("maxError : ", frData$maxError), mode="markers", size=frData$maxError, color=frData$minError)

Breakdown per city

#subset data and aggregate them with some averages
FRCity<-as.data.frame(FR %>% group_by(City) %>% select(AverageTemperature,AverageTemperatureUncertainty,Latitude,Longitude) %>% summarise(avg_Temp = mean(AverageTemperature), avg_Uncertainty = mean(AverageTemperatureUncertainty),LAT = mean(Latitude), LON = mean(Longitude)))
#define functions to modify the latitude/longitude columns to be plotted later
convertLon<-function(x){
    westEast<-substr(x,nchar(x),nchar(x))
    val<-as.numeric(char2dms(paste0(substr(x,1,1),'d',substr(x,3,4),"'",substr(x,4,4))))
    if(westEast=='W'){return(-1*val)}
    else {return(val)}
}

convertLat<-function(x){
    val<-as.numeric(char2dms(paste0(substr(x,1,2),'d',substr(x,4,5),"'","00\"N")))
    return(val)
}
#select a single row to get the full list of cities and create a DF with |city_name | longitude | latitude
tempo<-FR[FR$date=='2010-01-01',]

tempo$newLat<-sapply(tempo$Latitude,convertLat)
tempo$Longitude2<-as.character(tempo$Longitude)
tempo$newLon<-sapply(tempo$Longitude2,convertLon)

cityList<-unique(tempo$City)

init<-geocode(as.character(cityList[1]))
init$name<-as.character(cityList[1])

for (city in 2:length(cityList)){
    tempo<-geocode(as.character(cityList[city]))
    tempo$name<-as.character(cityList[city])
    init<-rbind(init,tempo)
}

#merge the DF with cities properties with the DF with data
colnames(init)<-c('newLon','newLat','City')
tempo2<-merge(FRCity,init,by="City")