#download the file
clim <- read.csv (‘https://userpage.fu-berlin.de/soga/data/raw-data/Climfrance.csv’, sep = “;”)
#take a look at the variables in the file
str(clim) head(clim) View(clim) #it appears that two of the variables (altitude, p_mean)are character and not numeric, so i have to convert them both.
clim\(altitude <- as.numeric(gsub(",", "", clim\)altitude)) clim\(p_mean <- as.numeric(gsub(",", "", clim\)p_mean)) #check again to confrim str(clim)
#to plot the temperatures of these loacations in France, we need to install and import the following libraries
install.packages(“maps”) install.packages(“mapdata”) library(ggplot2) library(maps) library(mapdata)
#load the france map france_map <- map_data(“france”) #check the column heads for their names , so that we can match it with the points from our data set head(france_map)
ggplot() + geom_polygon(data = france_map, aes(x = long, y = lat, group = group), fill = “cyan”, color = “black”) + geom_point(data = clim, aes(x = lon, y = lat), color = “darkred”, size = 2) + labs(title = “Station Locations in France”, x = “Longitude”, y = “Latitude”) + theme_minimal()
#Exercise 1 #Test latitude, longtitude and altitude as independent variable for the t_mean
#but first we exclude the high mount extremes and create a new dataframe
climfranc <- clim[1:34, ]
model <- lm(t_mean ~ lat + lon + altitude, data = climfranc) summary(model)
#below is the coefficents from the model #lm(formula = t_mean ~ lat + lon + altitude, data = climfranc)
#Coefficients: # Estimate Std. Error t value Pr(>|t|)
#(Intercept) 37.2650364 2.6220099 14.212 7.29e-15 #lat
-0.5339603 0.0557546 -9.577 1.24e-10 #lon 0.0321010
0.0395728 0.811 0.424
#altitude -0.0064139 0.0008688 -7.383 3.17e-08 ***
#what does this mean
#lat: When there is a unit increase in latitude, the predicted annual mean temperature decreases by -0.53 degree celcius holding longitude and altitude constant.
#lon: When there is a unit increase in longtitude, the predicted annual mean temperature increases by -0.032 degree celcius holding latitude and altitude constant
#NOTE THAT LONGTITUDE PVALUE IS NOT SIGNIFICANT
#Exercise 2 #we will exclude longtitude because it does not significantly affect the model
model2 <- lm(t_mean ~ lat + altitude, data = climfranc) summary(model)
#Residuals: # Min 1Q Median 3Q Max #-1.79206 -0.27571 -0.00556
0.30536 2.71871 #Coefficients: # Estimate Std. Error t value
Pr(>|t|)
#(Intercept) 37.9147567 2.4828724 15.27 5.68e-16 #lat
-0.5465325 0.0532610 -10.26 1.72e-11 #altitude -0.0062643
0.0008443 -7.42 2.34e-08 *** #Signif. codes: 0 ‘’ 0.001
‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1 #Residual standard error:
0.7268 on 31 degrees of freedom #Multiple R-squared: 0.8292, Adjusted
R-squared: 0.8182 #F-statistic: 75.26 on 2 and 31 DF, p-value:
1.268e-12
#The model result rememains fairly the same but there is a small change in the coefficients
#predicted for Mont-ventoux and Pic-du-midi with the altitide and latitude form the dataframe
new_dataM <- list(altitude = 1212, lat = 44.16) new_dataP <- list(altitude = 2860, lat = 42.93)
pred_temp_M <- predict(model2, newdata = new_dataM, interval = “p”, level = 0.95) pred_temp_M # Prediction for the second set of values pred_temp_P <- predict(model2, newdata = new_dataP, interval = “p”, level = 0.95)
pred_temp_P
#The predicted mean for Mont-Ventoux is 6.17°C #with a 95-prediction-interval of [3.79°C,8.54°C] #Since the measured mean is 3.6°C #our model is not accurate enough to reproduce the temperature for Mont-Ventoux. #The predicted mean for Pic-du-midi is −3.45°C with a 95 -prediction-interval of [−8.35°C,1.45°C] # So, our prediction still covers the measured mean of −1.2°C
#Exercise 3 #install and load scatterplot package install.packages(“scatterplot3d”)
library(scatterplot3d)
scatterplot_3d <- with(climfranc, scatterplot3d(altitude, lat, t_mean, pch = 16, highlight.3d = TRUE, angle = 45,))
scatterplot_3d$plane3d(model2)
#the linearmodel doesn’t seem to fit all the points, especially data points with low altitde and latitude.