project 3

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
data <- read_csv("data/Bird_Migration_Data_with_Origin.csv")
Rows: 10000 Columns: 42
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (20): Bird_ID, Species, Region, Habitat, Weather_Condition, Migration_Re...
dbl (22): Start_Latitude, Start_Longitude, End_Latitude, End_Longitude, Flig...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hist(data$Average_Speed_kmph)

mod<-lm(data$Average_Speed_kmph~data$Temperature_C+data$Wind_Speed_kmph+data$`Humidity_%`+data$Pressure_hPa+data$Visibility_km+data$Max_Altitude_m+data$Min_Altitude_m)
par(mfrow=c(2,2))
plot(mod)

par(mfrow=c(2,2))
hist(data$Temperature_C)
hist(data$Wind_Speed_kmph)
hist(data$`Humidity_%`)

data2<-data %>% filter(Species == "Hawk" & Migration_Reason == "Climate Change" & Region == "Africa") %>% select(Average_Speed_kmph,Wind_Speed_kmph,`Humidity_%`,Pressure_hPa,Visibility_km,Max_Altitude_m,Min_Altitude_m,Temperature_C)

names(data2)
[1] "Average_Speed_kmph" "Wind_Speed_kmph"    "Humidity_%"        
[4] "Pressure_hPa"       "Visibility_km"      "Max_Altitude_m"    
[7] "Min_Altitude_m"     "Temperature_C"     
hist(data2$Average_Speed_kmph)

mod1<-lm(data2$Average_Speed_kmph~data2$Temperature_C+data2$Wind_Speed_kmph+data2$`Humidity_%`+data2$Pressure_hPa+data2$Visibility_km+data2$Max_Altitude_m+data2$Min_Altitude_m)
plot(mod1)

summary(mod1)

Call:
lm(formula = data2$Average_Speed_kmph ~ data2$Temperature_C + 
    data2$Wind_Speed_kmph + data2$`Humidity_%` + data2$Pressure_hPa + 
    data2$Visibility_km + data2$Max_Altitude_m + data2$Min_Altitude_m)

Residuals:
     Min       1Q   Median       3Q      Max 
-16.2900  -2.6031   0.8267   3.7485  10.3640 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)   
(Intercept)           74.0669248 25.2583326   2.932  0.00486 **
data2$Temperature_C    0.0493236  0.0492116   1.002  0.32052   
data2$Wind_Speed_kmph  0.0310729  0.0412593   0.753  0.45454   
data2$`Humidity_%`    -0.0533046  0.0291101  -1.831  0.07240 . 
data2$Pressure_hPa    -0.0191934  0.0248622  -0.772  0.44336   
data2$Visibility_km    0.0452962  0.1241083   0.365  0.71651   
data2$Max_Altitude_m  -0.0008051  0.0002744  -2.934  0.00484 **
data2$Min_Altitude_m  -0.0006706  0.0027517  -0.244  0.80834   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.304 on 56 degrees of freedom
Multiple R-squared:  0.2086,    Adjusted R-squared:  0.1097 
F-statistic: 2.109 on 7 and 56 DF,  p-value: 0.05739
cor(data2)
                   Average_Speed_kmph Wind_Speed_kmph  Humidity_% Pressure_hPa
Average_Speed_kmph         1.00000000      0.04260860 -0.20604167  -0.13740614
Wind_Speed_kmph            0.04260860      1.00000000  0.09653367  -0.16104220
Humidity_%                -0.20604167      0.09653367  1.00000000  -0.17721906
Pressure_hPa              -0.13740614     -0.16104220 -0.17721906   1.00000000
Visibility_km              0.01329199     -0.21465036  0.02590460   0.04224999
Max_Altitude_m            -0.35506087      0.09567947 -0.01132669   0.16903101
Min_Altitude_m            -0.01238736     -0.09497654 -0.13439461   0.11451995
Temperature_C              0.13350080     -0.03364202 -0.13009044  -0.02803807
                   Visibility_km Max_Altitude_m Min_Altitude_m Temperature_C
Average_Speed_kmph    0.01329199    -0.35506087    -0.01238736    0.13350080
Wind_Speed_kmph      -0.21465036     0.09567947    -0.09497654   -0.03364202
Humidity_%            0.02590460    -0.01132669    -0.13439461   -0.13009044
Pressure_hPa          0.04224999     0.16903101     0.11451995   -0.02803807
Visibility_km         1.00000000     0.05658035     0.07157553    0.16596821
Max_Altitude_m        0.05658035     1.00000000     0.05231357    0.05987688
Min_Altitude_m        0.07157553     0.05231357     1.00000000    0.18349710
Temperature_C         0.16596821     0.05987688     0.18349710    1.00000000

No co linearity

pairs(data2)

No co linearity

aova <-aov(data2$Average_Speed_kmph~data2$Max_Altitude_m)
summary(aova)
                     Df Sum Sq Mean Sq F value  Pr(>F)   
data2$Max_Altitude_m  1    251  250.97   8.944 0.00399 **
Residuals            62   1740   28.06                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pca1 <- prcomp(data2[,1:7], scale = F) 
summary(pca1) 
Importance of components:
                             PC1      PC2      PC3      PC4      PC5   PC6
Standard deviation     2504.8138 250.4757 28.75598 22.60993 16.51005 5.513
Proportion of Variance    0.9898   0.0099  0.00013  0.00008  0.00004 0.000
Cumulative Proportion     0.9898   0.9997  0.99987  0.99995  0.99999 1.000
                         PC7
Standard deviation     4.965
Proportion of Variance 0.000
Cumulative Proportion  1.000
pca1
Standard deviations (1, .., p=7):
[1] 2504.813788  250.475746   28.755977   22.609931   16.510050    5.513165
[7]    4.965077

Rotation (n x k) = (7 x 7):
                             PC1           PC2          PC3           PC4
Average_Speed_kmph -0.0007968324  0.0001458871 -0.002273844 -0.0630785022
Wind_Speed_kmph     0.0006505230 -0.0068685647 -0.158907229 -0.0010841464
Humidity_%         -0.0001081765 -0.0128502502 -0.380960469  0.9207031259
Pressure_hPa        0.0019089912  0.0121152805  0.910647467  0.3848666803
Visibility_km       0.0001269810  0.0015432027  0.006380980  0.0123162459
Max_Altitude_m      0.9999836394 -0.0053098185 -0.001588747 -0.0007240638
Min_Altitude_m      0.0052906330  0.9998051395 -0.017040923  0.0071487955
                             PC5           PC6           PC7
Average_Speed_kmph -0.0284305398  0.3783371963  0.9230752658
Wind_Speed_kmph    -0.9834647116  0.0648734444 -0.0573438243
Humidity_%          0.0577719138  0.0095664439  0.0598383092
Pressure_hPa       -0.1481685768  0.0097784760  0.0199714632
Visibility_km       0.0817172209  0.9232898732 -0.3750510651
Max_Altitude_m      0.0009187898  0.0001298459  0.0007857482
Min_Altitude_m     -0.0043354284 -0.0010294795  0.0005815082
screeplot(pca1)