task 2

data <- read.table("./LocationTest.csv", 
                     header=TRUE, 
                     sep=";", 
                     dec=",")

head(data)

dataFrame <- data.frame(
  Y = data$winddirectionSum,
  X1 = data$winddirection_10m,
  X2 = data$winddirection_100m
)
dataFrame

model <- lm(winddirectionSum ~ winddirection_100m + winddirection_10m, data = data)

summary(model)

## 
## Call:
## lm(formula = winddirectionSum ~ winddirection_100m + winddirection_10m, 
##     data = data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -8.351e-11  7.000e-15  1.200e-14  1.700e-14  4.357e-12 
## 
## Coefficients:
##                      Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)        -2.393e-12  2.351e-14 -1.018e+02   <2e-16 ***
## winddirection_100m  1.000e+00  2.330e-16  4.292e+15   <2e-16 ***
## winddirection_10m   1.000e+00  2.372e-16  4.216e+15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.922e-13 on 10996 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.904e+32 on 2 and 10996 DF,  p-value: < 2.2e-16

plot(data$winddirection_10m, data$winddirection_100m, col = "blue", main = "Scatter Plot with Regression Line")
abline(model, col = "red")

## Warning in abline(model, col = "red"): only using the first two of 3 regression coefficients

#The intercept is not significant because the p-value is very small, indicating that the intercept is not different from zero. This may be common in some situations

#The coefficients for both wind directions are very significant (p-value < 0.05). Each coefficient of 1 means that winddirectionSum increases by 1 unit for every 1 unit increase in winddirection_100m or winddirection_10m

#A very low residual standard deviation indicates that the model explains the data well

#Both are equal to 1, which means that the model fits the data perfectly. This may indicate potential problems, such as multicolinearity or overfitting

#A very high value of the F-statistic indicates that the model has a statistically significant influence on the dependent variable