## Loading required package: psych
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

1. Attach data frame: Crow

1a. clean up data

1a.i. filter
Crow<-Crow%>%filter(Prop.Blocked !=0.44600)
1a.ii. convert aviary, chick.id, and survived to factors
Crow$Aviary<-as.factor(Crow$Aviary)
Crow$Chick.ID<-as.factor(Crow$Chick.ID)
Crow$Survived<-as.factor(Crow$Survived)
1a.iii. clean up dates

I have this on eval=F because it wouldn’t run for me

Crow<-Crow%>%separate(Date, c("day", "month"), sep = "-")
Crow$year<-"2020"

Crow$year<-as.numeric(Crow$year)
Crow<-Crow%>%mutate(date1=paste(Crow$day, Crow$month, Crow$year), sep="-")
Crow$date1<-dmy(Crow$date1)
Crow$sep<-NULL

2. Build PCA with Crow

2a. Create data frame with behavioral variables on which we’ll run the PCA: Crow.pca

Crow.pca<-Crow%>%select("Freq.Aggro", "Freq.Allofeed", "Freq.Allopreen", "Freq.Beg", "Freq.Clean", "Freq.Feed", "Freq.Nest", "Prop.On", "Freq.Preen")

2b. Run the PCA with three factors: Crow.pca.1

Crow.pca.1<-psych::principal(Crow.pca, rotate="varimax", nfactors=3, scores=TRUE)
Crow.pca.1

2c. Parallel analysis on Crow.pca.1

fa.parallel(Crow.pca, fm = "mle", fa = "pc", nfactors = 3, n.iter = 1000)

2d. Run the PCA with four factors (per the parallel analysis)

Crow.pca.1<-psych::principal(Crow.pca, rotate = "varimax", nfactors = 4, scores = TRUE)
Crow.pca.1
2d.i. biplots of the Crow.pca.1 with four factors
biplot(Crow.pca.1, label=TRUE)
biplot(Crow.pca.1, choose = c(1,2))
biplot(Crow.pca.1, choose = c(1,3))
biplot(Crow.pca.1, choose = c(1,4))
biplot(Crow.pca.1, choose = c(2,3))
biplot(Crow.pca.1, choose = c(2,4))
biplot(Crow.pca.1, choose = c(3,4))
2d.ii. scores
scores.1<-Crow.pca.1$scores
summary(scores.1)
2d.iii. plot scores
plot(scores.1)

3. Create new data frame with the scores

3a. appended scores to Crow: dat.w.pca.scores

dat.w.pca.scores<-cbind(Crow, scores.1)

3b. rename components to reflect biology: dat.w.pca.scores

dat.w.pcs.scores<-dat.w.pca.scores%>%rename("Parental.Attentive"=RC1, "Housekeeping"=RC2, "Allopreen"=RC3, "Aggression"=RC4)
names(dat.w.pcs.scores)

4. Relationship between scores, survival, aviary, chick.id

4a. Plot scores against aviary

plot(dat.w.pcs.scores$Parental.Attentive ~ dat.w.pca.scores$Aviary)

4b. plot survived against avg score per chick (chick is covariate)

but this code doesn’t take average score per chick

plot(dat.w.pca.scores$Survived~dat.w.pca.scores$RC1)

5. Build PCA with Crow but omitting preen

NoPreen.pca<-Crow%>%select("Freq.Aggro", "Freq.Allofeed", "Freq.Allopreen", "Freq.Beg", "Freq.Clean", "Freq.Feed", "Freq.Nest", "Prop.On")

5a. run the PCA with three factors: NoPreen.pca.1

NoPreen.pca.1<-psych::principal(NoPreen.pca, rotate="varimax", nfactors=3, scores=TRUE)
NoPreen.pca.1
## Principal Components Analysis
## Call: psych::principal(r = NoPreen.pca, nfactors = 3, rotate = "varimax", 
##     scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                  RC1   RC2   RC3   h2   u2 com
## Freq.Aggro     -0.08  0.17 -0.51 0.30 0.70 1.3
## Freq.Allofeed   0.85  0.01  0.16 0.74 0.26 1.1
## Freq.Allopreen -0.02  0.05  0.80 0.64 0.36 1.0
## Freq.Beg        0.09  0.88  0.07 0.79 0.21 1.0
## Freq.Clean      0.80  0.20 -0.31 0.78 0.22 1.4
## Freq.Feed       0.08  0.82 -0.25 0.74 0.26 1.2
## Freq.Nest       0.58  0.46  0.40 0.71 0.29 2.7
## Prop.On         0.63 -0.55  0.29 0.79 0.21 2.4
## 
##                        RC1  RC2  RC3
## SS loadings           2.12 2.04 1.33
## Proportion Var        0.27 0.25 0.17
## Cumulative Var        0.27 0.52 0.69
## Proportion Explained  0.39 0.37 0.24
## Cumulative Proportion 0.39 0.76 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.1 
##  with the empirical chi square  85.97  with prob <  8.3e-16 
## 
## Fit based upon off diagonal values = 0.87

5b. run the parallel analysis on NoPreen.pca.1

fa.parallel(NoPreen.pca, fm = "mle", fa = "pc", nfactors = 3, n.iter = 1000)

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  2

5c. run the PCA with four factors: NoPreen.pca.1

Even thought the parallel analysis suggested two components, I thought the 80% variance provided by 4 components was preferred over the ~50% with just two components.

NoPreen.pca.1<-psych::principal(NoPreen.pca, rotate="varimax", nfactors=4, scores=TRUE)
NoPreen.pca.1
## Principal Components Analysis
## Call: psych::principal(r = NoPreen.pca, nfactors = 4, rotate = "varimax", 
##     scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                  RC1   RC2   RC3   RC4   h2    u2 com
## Freq.Aggro     -0.04  0.07  0.00  0.97 0.95 0.048 1.0
## Freq.Allofeed   0.85 -0.01  0.11 -0.09 0.74 0.257 1.1
## Freq.Allopreen  0.03 -0.06  0.94  0.01 0.90 0.103 1.0
## Freq.Beg        0.10  0.89  0.09 -0.06 0.81 0.191 1.1
## Freq.Clean      0.80  0.20 -0.26  0.19 0.79 0.214 1.5
## Freq.Feed       0.08  0.83 -0.15  0.16 0.74 0.256 1.2
## Freq.Nest       0.60  0.43  0.36 -0.18 0.71 0.292 2.8
## Prop.On         0.63 -0.57  0.17 -0.20 0.79 0.212 2.4
## 
##                        RC1  RC2  RC3  RC4
## SS loadings           2.13 2.03 1.17 1.09
## Proportion Var        0.27 0.25 0.15 0.14
## Cumulative Var        0.27 0.52 0.67 0.80
## Proportion Explained  0.33 0.32 0.18 0.17
## Cumulative Proportion 0.33 0.65 0.83 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 4 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.07 
##  with the empirical chi square  46.83  with prob <  6.8e-11 
## 
## Fit based upon off diagonal values = 0.93
5c.i. biplots of the NoPreen.pca.1 with two factors
biplot(NoPreen.pca.1, label=TRUE)

biplot(NoPreen.pca.1, choose = c(1,2))

biplot(NoPreen.pca.1, choose = c(1,3))

biplot(NoPreen.pca.1, choose = c(1,4))

biplot(NoPreen.pca.1, choose = c(2,3))

biplot(NoPreen.pca.1, choose = c(2,4))

biplot(NoPreen.pca.1, choose = c(3,4))

##### 5c.ii. scores

NoPreen.scores.1<-NoPreen.pca.1$scores
summary(NoPreen.scores.1)
##       RC1               RC2               RC3               RC4          
##  Min.   :-1.6253   Min.   :-1.4606   Min.   :-1.2160   Min.   :-0.74308  
##  1st Qu.:-0.7336   1st Qu.:-0.8521   1st Qu.:-0.4866   1st Qu.:-0.27626  
##  Median :-0.1397   Median :-0.2316   Median :-0.2620   Median :-0.15478  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000  
##  3rd Qu.: 0.7415   3rd Qu.: 0.6201   3rd Qu.: 0.1462   3rd Qu.: 0.01536  
##  Max.   : 2.5559   Max.   : 2.4601   Max.   : 5.6409   Max.   :10.13947
5c.iii. plot scores
plot(NoPreen.scores.1)

6. Create new data frame with the scores

6a. appended scores to Crow: NP.w.pca.scores

NP.w.pca.scores<-cbind(Crow, NoPreen.scores.1)

6b. rename components to reflect biology: NP.w.pcs.scores

NP.w.pcs.scores<-NP.w.pca.scores%>%rename("Housekeeping"=RC1, "Parental.Attentive"=RC2, "Allopreen" = RC3, "Aggression" = RC4)
names(NP.w.pcs.scores)
##  [1] "Aviary"             "Chick.ID"           "Date"              
##  [4] "Survived"           "Days.Survived"      "Fledged"           
##  [7] "Age"                "Total.length"       "Freq.Aggro"        
## [10] "Freq.Allofeed"      "Freq.Allopreen"     "Dur.Blocked"       
## [13] "Prop.Blocked"       "Freq.Beg"           "Freq.Clean"        
## [16] "Freq.Feed"          "Freq.Nest"          "Dur.Off"           
## [19] "Prop.Off"           "Dur.On"             "Prop.On"           
## [22] "Freq.Preen"         "Fem.Clean"          "Fem.Feed"          
## [25] "Fem.Nest"           "Fem.Dur.Off"        "Fem.Prop.Off"      
## [28] "Fem.Dur.On"         "Fem.Prop.On"        "Fem.Preen"         
## [31] "Male.Allofeed"      "Male.Allopreen"     "Male.Clean"        
## [34] "Male.Feed"          "Male.Nest"          "Male.Dur.Off"      
## [37] "Male.Prop.Off"      "Male.Dur.On"        "Male.Prop.On"      
## [40] "Male.Preen"         "Housekeeping"       "Parental.Attentive"
## [43] "Allopreen"          "Aggression"

7. Relationship between scores, survival, aviary, chick.id

7a. Plot Attentiveness scores against aviary

plot(NP.w.pcs.scores$Parental.Attentive ~ NP.w.pcs.scores$Aviary)

7b. Plot Housekeeping scores against aviary

plot(NP.w.pcs.scores$Housekeeping ~ NP.w.pcs.scores$Aviary)

7c. plot survived against avg Attentiveness score per chick (chick is covariate)

7d. plot survived against avg Housekeeping score per chick (chick is covariate)

8. Run a general linear model of survival as function of PCA scores

8a. create behavioral variables that are just for first two weeks

Allofeed = NP.w.pcs.scores$Freq.Allofeed[NP.w.pcs.scores$Age<15]
Allopreen = NP.w.pcs.scores$Freq.Allopreen[NP.w.pcs.scores$Age<15]
Aggro = NP.w.pcs.scores$Freq.Aggro[NP.w.pcs.scores$Age<15]
Beg = NP.w.pcs.scores$Freq.Beg[NP.w.pcs.scores$Age<15]
Clean = NP.w.pcs.scores$Freq.Clean[NP.w.pcs.scores$Age<15]
Feed = NP.w.pcs.scores$Freq.Feed[NP.w.pcs.scores$Age<15]
Nest = NP.w.pcs.scores$Freq.Nest[NP.w.pcs.scores$Age<15]
Feather = NP.w.pcs.scores$Freq.Preen[NP.w.pcs.scores$Age<15]
OnChick = NP.w.pcs.scores$Prop.On[NP.w.pcs.scores$Age<15]

8b. average the scores for the Parental Attentive component for each chick

Parent.Aalii = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL337" & NP.w.pcs.scores$Age < 15])
Parent.AV_2 = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL344" & NP.w.pcs.scores$Age < 15])
Parent.AV_A = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL343" & NP.w.pcs.scores$Age < 15])
Parent.AV_A.2 = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL341" & NP.w.pcs.scores$Age < 15])
Parent.AV_5 = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL346" & NP.w.pcs.scores$Age < 15])
Parent.AV_B = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL334" & NP.w.pcs.scores$Age < 15])
Parent.AV_C = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL345" & NP.w.pcs.scores$Age < 15])
Parent.AV_D = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL333" & NP.w.pcs.scores$Age < 15])
Parent.Alani = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Aviary == "Alani" & NP.w.pcs.scores$Age < 15])
Parent.Koa4 = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL340" & NP.w.pcs.scores$Age < 15])
Parent.Ohia = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL335" & NP.w.pcs.scores$Age < 15])
Parent.Ohia.2 = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL336" & NP.w.pcs.scores$Age < 15])
Parent.Pukiawe = mean(NP.w.pcs.scores$Parental.Attentive[NP.w.pcs.scores$Chick.ID == "AL338" & NP.w.pcs.scores$Age < 15])
8b.i.create a vector with all of the means for the Parent Attentiveness component
Parent.means = c(Parent.Aalii, Parent.AV_2, Parent.AV_5, Parent.AV_A, Parent.AV_A.2, Parent.AV_B, Parent.AV_C, Parent.AV_D, Parent.Koa4, Parent.Ohia, Parent.Ohia.2, Parent.Pukiawe)

8c. average the scores for the Housekeeping component for each chick

Housekeeping.Aalii = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL337" & NP.w.pcs.scores$Age < 15])
Housekeeping.AV_2 = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL344" & NP.w.pcs.scores$Age < 15])
Housekeeping.AV_A = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL343" & NP.w.pcs.scores$Age < 15])
Housekeeping.AV_A.2 = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL341" & NP.w.pcs.scores$Age < 15])
Housekeeping.AV_5 = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL346" & NP.w.pcs.scores$Age < 15])
Housekeeping.AV_B = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL334" & NP.w.pcs.scores$Age < 15])
Housekeeping.AV_C = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL345" & NP.w.pcs.scores$Age < 15])
Housekeeping.AV_D = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL333" & NP.w.pcs.scores$Age < 15])
Housekeeping.Koa4 = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL340" & NP.w.pcs.scores$Age < 15])
Housekeeping.Ohia = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL335" & NP.w.pcs.scores$Age < 15])
Housekeeping.Ohia.2 = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL336" & NP.w.pcs.scores$Age < 15])
Housekeeping.Pukiawe = mean(NP.w.pcs.scores$Housekeeping[NP.w.pcs.scores$Chick.ID == "AL338" & NP.w.pcs.scores$Age < 15])
8c.i. create a vector with all of the means for the Housekeeping component
House.means = c(Housekeeping.Aalii, Housekeeping.AV_2, Housekeeping.AV_5, Housekeeping.AV_A, Housekeeping.AV_A.2, Housekeeping.AV_B, Housekeeping.AV_C, Housekeeping.AV_D, Housekeeping.Koa4, Housekeeping.Ohia, Housekeeping.Ohia.2, Housekeeping.Pukiawe)

8d. create a vector with survival values for each chick

Caroline, make sure that these are in the same order as your means vectors

Survive.Chick=c(1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1)
8d.i. and then convert to factor
Survive.Chick = factor(Survive.Chick)

8e. plot survived against avg Attentiveness score per chick

plot(Parent.means~Survive.Chick)

8f. plot survived against avg Housekeeping score per chick

plot(House.means~Survive.Chick)

9. Build the model
model.both = glm(Survive.Chick~House.means + Parent.means, family = binomial(link=logit))
summary(model.both)
## 
## Call:
## glm(formula = Survive.Chick ~ House.means + Parent.means, family = binomial(link = logit))
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.06755  -0.26522  -0.11414   0.00402   1.69056  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)     9.010     13.173   0.684    0.494
## House.means     0.182      2.464   0.074    0.941
## Parent.means   13.486     16.659   0.810    0.418
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 15.2763  on 11  degrees of freedom
## Residual deviance:  4.7517  on  9  degrees of freedom
## AIC: 10.752
## 
## Number of Fisher Scoring iterations: 9
model.parent = glm(Survive.Chick~Parent.means, family = binomial(link=logit))
summary(model.parent)
## 
## Call:
## glm(formula = Survive.Chick ~ Parent.means, family = binomial(link = logit))
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.03320  -0.26394  -0.11263   0.00308   1.69997  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)     9.668     10.747   0.900    0.368
## Parent.means   14.279     14.056   1.016    0.310
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 15.2763  on 11  degrees of freedom
## Residual deviance:  4.7571  on 10  degrees of freedom
## AIC: 8.7571
## 
## Number of Fisher Scoring iterations: 8
model.house = glm(Survive.Chick~House.means, family = binomial(link=logit))
summary(model.house)
## 
## Call:
## glm(formula = Survive.Chick ~ House.means, family = binomial(link = logit))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0334  -0.4176  -0.3618   0.5153   1.2036  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   -1.453      1.023  -1.419   0.1558  
## House.means    3.295      1.739   1.894   0.0582 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 15.2763  on 11  degrees of freedom
## Residual deviance:  8.7819  on 10  degrees of freedom
## AIC: 12.782
## 
## Number of Fisher Scoring iterations: 5

Misc leftovers

M1<-glm(Survive~NP.w.pcs.scores$Parental.Attentive+NP.w.pcs.scores$Housekeeping, family = binomial(link=logit))
summary(M1)
M1

#Deviance not terribly important unless running poisson: Alani removed due to lack of information => Need to average scores for chicks, then run this model#