library(ggplot2)
library(dplyr)
library(sp)
library(ggmap)
library(plotly)
These data are from a Kaggle dataset, described here. In this Rmarkdown file I’m interested in representing the evolution of the average land temperature vs. time , as well as a geographic representation.
#load data ; remove NA if any and creae 3 new columns for decoding year, month and day data
#selection is done only for french cities
df<-read.csv('GlobalLandTemperaturesByCity.csv',sep=',')
FR<-filter(df,Country=='France')
FR<-na.omit(FR)
FR$date<-as.Date(FR$dt)
FR$year<-as.numeric(format(FR$date,'%Y'))
FR$month<-as.numeric(format(FR$date,'%m'))
FR$day<-as.numeric(format(FR$date,'%d'))
head(FR)
## dt AverageTemperature AverageTemperatureUncertainty
## 1 1743-11-01 7.478 1.866
## 6 1744-04-01 11.596 2.044
## 7 1744-05-01 13.287 1.791
## 8 1744-06-01 17.675 1.733
## 9 1744-07-01 20.056 1.825
## 11 1744-09-01 15.835 1.834
## City Country Latitude Longitude date year month day
## 1 Aix En Provence France 44.20N 4.47E 1743-11-01 1743 11 1
## 6 Aix En Provence France 44.20N 4.47E 1744-04-01 1744 4 1
## 7 Aix En Provence France 44.20N 4.47E 1744-05-01 1744 5 1
## 8 Aix En Provence France 44.20N 4.47E 1744-06-01 1744 6 1
## 9 Aix En Provence France 44.20N 4.47E 1744-07-01 1744 7 1
## 11 Aix En Provence France 44.20N 4.47E 1744-09-01 1744 9 1
# select the AverageTemperature, take it's average per year ; take the min and max of average errors
frData<-as.data.frame(FR %>% group_by(year) %>% select(AverageTemperature,AverageTemperatureUncertainty) %>% summarise(avg_Temp = mean(AverageTemperature),minError = min(AverageTemperatureUncertainty),maxError = max(AverageTemperatureUncertainty)))
plot<-ggplot(data=frData,aes(x=year,y=avg_Temp)) + geom_point(aes(size=maxError),alpha=.75) + geom_smooth(color='red')
print(plot)
#same as above with I wanted to test plot_ly with a Rmarkdown file
plot_ly(frData,x = frData$year, y = frData$avg_Temp, text = paste("maxError : ", frData$maxError), mode="markers", size=frData$maxError, color=frData$minError)
#subset data and aggregate them with some averages
FRCity<-as.data.frame(FR %>% group_by(City) %>% select(AverageTemperature,AverageTemperatureUncertainty,Latitude,Longitude) %>% summarise(avg_Temp = mean(AverageTemperature), avg_Uncertainty = mean(AverageTemperatureUncertainty),LAT = mean(Latitude), LON = mean(Longitude)))
#define functions to modify the latitude/longitude columns to be plotted later
convertLon<-function(x){
westEast<-substr(x,nchar(x),nchar(x))
val<-as.numeric(char2dms(paste0(substr(x,1,1),'d',substr(x,3,4),"'",substr(x,4,4))))
if(westEast=='W'){return(-1*val)}
else {return(val)}
}
convertLat<-function(x){
val<-as.numeric(char2dms(paste0(substr(x,1,2),'d',substr(x,4,5),"'","00\"N")))
return(val)
}
#select a single row to get the full list of cities and create a DF with |city_name | longitude | latitude
tempo<-FR[FR$date=='2010-01-01',]
tempo$newLat<-sapply(tempo$Latitude,convertLat)
tempo$Longitude2<-as.character(tempo$Longitude)
tempo$newLon<-sapply(tempo$Longitude2,convertLon)
cityList<-unique(tempo$City)
init<-geocode(as.character(cityList[1]))
init$name<-as.character(cityList[1])
for (city in 2:length(cityList)){
tempo<-geocode(as.character(cityList[city]))
tempo$name<-as.character(cityList[city])
init<-rbind(init,tempo)
}
#merge the DF with cities properties with the DF with data
colnames(init)<-c('newLon','newLat','City')
tempo2<-merge(FRCity,init,by="City")