————————————————————

Exploring pers variables

Density plots for all pers variables - CONTROL

library(ggplot2)


AllVard <- unique(melt_persIDcs_c$variable)
for (i in 1:length(AllVard))
{
  Var.i <- AllVard[i]
  dfs.i <- melt_persIDcs_c[melt_persIDcs_c$variable == Var.i, ]
  print(ggplot(data = dfs.i, aes(x = value)) + geom_density(aes(x = value), fill = "grey") + ggtitle(paste(Var.i, "_CTR",sep = "")) + theme(legend.position="none")) 
          
}

Density plots for all pers variables - EXPERIMENT

library(ggplot2)

AllVard1 <- unique(melt_persIDcs_e$variable)
for (i in 1:length(AllVard1))
{
  Var.i <- AllVard1[i]
  dfs.i <- melt_persIDcs_e[melt_persIDcs_e$variable == Var.i, ]
  print(ggplot(data = dfs.i, aes(x = value)) + geom_density(aes(x = value), fill = "grey") +
          ggtitle(paste(Var.i, "_EXP",sep = "")) + theme(legend.position="none")) 
          
  }

Ctr vs exp distributions of the pers variables - KS test

AllVard2c <- unique(melt_persIDcs_c$variable)
AllVard2e <- unique(melt_persIDcs_e$variable)

for (i in 1:length(AllVard2c))
{
  Varc.i <- AllVard2c[i]
  Vare.i <- AllVard2e[i]
  
  dfsc.i <- melt_persIDcs_c[melt_persIDcs_c$variable == Varc.i, ]
  dfse.i <- melt_persIDcs_e[melt_persIDcs_e$variable == Vare.i, ]
  print(Varc.i)
  print(ks.test(dfsc.i$value,   dfse.i$value, alternative = "two.sided"))  
  }

=> Concl ks.test: signif diff in mean_durouts, and mov_feroc

Individual distribution by pers_var CONTROL

AllVar3 <- unique(melt_persIDcs_c$variable)
for (i in 1:length(AllVar3))
{
  Var.i <- AllVar3[i]
  dfs.i <- melt_persIDcs_c[melt_persIDcs_c$variable == Var.i, ]
#   YourFileName <- paste(Var.i, ".jpg", sep = "")
#   jpeg(file = YourFileName)
  print(ggplot(data = dfs.i, aes(x = ID.., y = value)) + geom_boxplot() + geom_point() + ggtitle(paste(Var.i, "_CTR", sep = "")) + theme(legend.position="none"))
  
}

Individual distribution by pers_var EXPERIMENT

AllVar4 <- unique(melt_persIDcs_e$variable)
for (i in 1:length(AllVar4))
{
  Var.i <- AllVar4[i]
  dfs.i <- melt_persIDcs_e[melt_persIDcs_e$variable == Var.i, ]
  print(ggplot(data = dfs.i, aes(x = ID.., y = value)) + geom_boxplot() + geom_point() + ggtitle(paste(Var.i, "_EXP", sep = "")) + theme(legend.position="none"))
  
}

Per individual, all pers_vars CONTROL

ssize_c <- table(melt_persIDcs_c$ID..)/12 # sample size for each ind
ssize_c <-ssize_c [! ssize_c %in% 0] # to get rid of inds with n = 0

melt_persIDcs_co <- melt_persIDcs_c[order(melt_persIDcs_c$ID..),]


AllVar5 <- unique(melt_persIDcs_co$ID..)
for (i in 1:length(AllVar5))
{
  Id.i <-  ssize_c[i]
  Var.i <- AllVar5[i]
  dfs.i <- melt_persIDcs_co[melt_persIDcs_co$ID.. == Var.i, ]
  print(ggplot(data = dfs.i, aes(x = variable, y = value)) + geom_point() + geom_boxplot() + coord_flip() + ggtitle(paste(Var.i, "_CTR", "_N=", ssize_c [i], sep = "")) + theme(legend.position="none"))
  
}

Per individual, all pers_vars EXPERIMENT

ssize_e <- table(melt_persIDcs_e$ID..)/12 # sample size for each ind
ssize_e <-ssize_e [! ssize_e %in% 0] # to get rid of ind with n = 0

melt_persIDcs_eo <- melt_persIDcs_e[order(melt_persIDcs_e$ID..),]

AllVar6 <- unique(melt_persIDcs_eo$ID..)
for (i in 1:length(AllVar6))
{
  Id.i <-  ssize_e[i]
  Var.i <- AllVar6[i]
  dfs.i <- melt_persIDcs_eo[melt_persIDcs_eo$ID.. == Var.i, ]
  print(ggplot(data = dfs.i, aes(x = variable, y = value)) + geom_point() + geom_boxplot() + coord_flip() + ggtitle(paste(Var.i, "_EXP", "_N=", ssize_e [i], sep = "")) + theme(legend.position="none"))
  
}

Testing for interindividual differences CONTROL (only, unsuff n ind tested in exp)

# Inds with n = 1 excluded:
table(melt_persIDcs_c$ID..)/12 # sample size for each ind
melt_persIDcs_cs. <- melt_persIDcs_c[melt_persIDcs_c$ID.. != "241",]
melt_persIDcs_cs <- melt_persIDcs_cs.[melt_persIDcs_cs.$ID.. != "365",] # selected df with ind of n > 1

AllVard7 <- unique(melt_persIDcs_cs$variable)

for (i in 1:length(AllVard7))
{
  Var.i <- AllVard7[i]
  dfs.i <- melt_persIDcs_cs[melt_persIDcs_cs$variable == Var.i, ]
  res.i <- lm(value ~ ID.., data = dfs.i)
  print("######################################################################")
  print(Var.i)
  print(summary(res.i))
  print(anova(res.i))
}

	Var	Nind	Radj	Pval
9	tot_durfor	4	0.38	0.000
6	tot_durins	3	0.26	0.000
3	stan_nouts	2	0.19	0.004
4	stan_nins	2	0.28	0.000
7	mean_durouts	2	0.44	0.000
8	tot_durouts	2	0.38	0.000
1	stan_nflo	1	0.23	0.001
2	stan_nflochang	1	0.09	0.068
5	mean_durins	0	0.11	0.041
10	mov_spead	0	-0.06	0.892
11	mov_feroc	0	0.08	0.093
12	stand_totdist	0	-0.04	0.752

Var - pers ariable; Nind - n individuals distinctly different (p < .05); Radj - R-adjsted of the model; Pval - p-value of the model

Relationships between the pers_var CONTROL (only ctr, unsuff n ind tested in exp)

panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...)
{
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- cor(x, y)
    txt <- format(c(r, 0.123456789), digits=digits)[1]
    txt <- paste(prefix, txt, sep="")
    if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
    text(0.5, 0.5, txt, cex = cex.cor * r)
}

pairs(persIDcs_m[persIDcs_m$treat == "Ctr",3:14], lower.panel=panel.smooth, upper.panel = panel.cor, pch = 20)

Relationships between the pers_var CONTROL, only vars with >2 n ind distinct in the ANOVA

Clustering individuals - CONTROL - all pers_var - testing various distance and clustering methods

library(pvclust)

persIDcs_mst <- t(persIDcs_m[persIDcs_m == "Ctr" , 3:14])
colnames(persIDcs_mst) <- unique(persIDcs_m[persIDcs_m == "Ctr" ,]$ID..)
persIDcs_mst_sc <- scale(persIDcs_mst)

methd <- c(rep("correlation", 5), rep("euclidean", 5), rep("manhattan", 5))
methcl <- c(rep(c("average", "ward.D", "ward.D2", "complete", "centroid"),3))

for (i in 1:length(methcl)) {
  methd.i <- methd [i]
  methcl.i <- methcl [i]
  fit.i <- pvclust(persIDcs_mst_sc, method.hclust=methcl[i],
               method.dist=methd[i], nboot = 1000, iseed = 20)
  print(plot(fit.i, main = "", hang = -1))
  print(pvrect(fit.i, alpha=.95))
}

Cophenetic correlations - to find the best method of dist and clust

Distance matrices tested (order):: “correlation”, “Euclidean”, “Manhattan”

library(proxy)

persIDcs_ms <- persIDcs_m[persIDcs_m == "Ctr" , 3:14]
colnames(persIDcs_ms) <- unique(persIDcs_m[persIDcs_m == "Ctr" ,]$ID..)
persIDcs_ms_sc <- scale(persIDcs_ms)

dist_cor <- proxy::dist(persIDcs_ms_sc, method = "correlation")
dist_eu <- proxy::dist(persIDcs_ms_sc, method = "Euclidean")
dist_mh <- proxy::dist(persIDcs_ms_sc, method ="Manhattan")


distmax <- rep(list (dist_cor, dist_eu, dist_mh),5)
cl_meth <- c(rep("average",3), rep("ward.D", 3), rep("ward.D2", 3), rep("complete", 3), rep("centroid", 3))

for (i in 1:length(distmax)) {
  hcl.i <- hclust(distmax[[i]], method = cl_meth[i])
  cop.i <- cophenetic(hcl.i)
  cor.i <- cor(distmax[[i]], cop.i)
  print(paste(cl_meth[i], " = ", cor.i, sep = ""))
}

## [1] "average = 0.682043136473381"
## [1] "average = 0.889322809943955"
## [1] "average = 0.89204775972415"
## [1] "ward.D = 0.636515527468485"
## [1] "ward.D = 0.831213328100565"
## [1] "ward.D = 0.83650658250423"
## [1] "ward.D2 = 0.657078067129058"
## [1] "ward.D2 = 0.838353838166757"
## [1] "ward.D2 = 0.846990081374562"
## [1] "complete = 0.670068714308669"
## [1] "complete = 0.819979221201634"
## [1] "complete = 0.831602610148754"
## [1] "centroid = 0.551842982952093"
## [1] "centroid = 0.889043498772771"
## [1] "centroid = 0.882295525059153"

Comp of the two trees with the best cophenetic correlations

-> dist: “Euclidean”; clust: “average” and “centroid”

Testing cohenetic for the two above

## [1] 0.9175938

PCA

library(FactoMineR)
library(factoextra)

# CONTROL
# data and PCA
df.pca<-persIDcs[persIDcs$treat == "Ctr",4:15]
df.pcaID <-persIDcs[persIDcs$treat == "Ctr",1]
df.pcatreat <-persIDcs[persIDcs$treat == "Ctr",2]

res.pca <- PCA(df.pca, graph = F)

# Eigenvalues and scree plot
eigv <- res.pca$eig
fviz_screeplot(res.pca)

# Correlations of variables with pc1 and pc2
fviz_pca_var(res.pca, axes = c(1,2), col.var = "cos2") +
scale_color_gradient2(low = "white", mid = "grey", high = "black", midpoint = 0.5) + theme_minimal()

# Contributions of particular variables into two first PCs 
fviz_pca_contrib(res.pca, choise = "var", axes = 1:2)

# EXPERIMENT
# data and PCA
df.pca.e<-persIDcs[persIDcs$treat == "Exp",4:15]
df.pcaID.e <-persIDcs[persIDcs$treat == "Exp",1]
df.pcatreat.e <-persIDcs[persIDcs$treat == "Exp",2]

res.pca.e <- PCA(df.pca.e, graph = F)

# Eigenvalues and scree plot
eigv.e <- res.pca.e$eig
fviz_screeplot(res.pca.e)

# Correlations of variables with pc1 and pc2
fviz_pca_var(res.pca.e, axes = c(1,2), col.var = "cos2") +
scale_color_gradient2(low = "white", mid = "grey", high = "black", midpoint = 0.5) + theme_minimal()

# Contributions of particular variables into two first PCs 
fviz_pca_contrib(res.pca.e, choise = "var", axes = 1:2)

Reapeted ANOVA on PCs

Results: all tests, p-values > .05

Testing interindvidual differences with PC1 and PC2 CONTROL only

(presented output for pc1 only)

## 
## Call:
## lm(formula = pc1 ~ ID.., data = df.pcdf_c)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2530 -1.0381  0.2307  0.8178  4.5991 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   0.5244     0.6684   0.785  0.43541   
## ID..203      -0.2508     0.9914  -0.253  0.80104   
## ID..241       4.7885     1.7684   2.708  0.00853 **
## ID..292      -1.1746     0.7667  -1.532  0.13010   
## ID..301      -0.6831     0.7909  -0.864  0.39074   
## ID..353      -1.7001     0.9109  -1.866  0.06623 . 
## ID..356       1.0531     0.8629   1.220  0.22646   
## ID..357       4.5131     1.3368   3.376  0.00121 **
## ID..364      -1.6502     0.8309  -1.986  0.05101 . 
## ID..367      -0.1003     1.0568  -0.095  0.92470   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.637 on 69 degrees of freedom
## Multiple R-squared:  0.424,  Adjusted R-squared:  0.3489 
## F-statistic: 5.643 on 9 and 69 DF,  p-value: 7.986e-06

## Analysis of Variance Table
## 
## Response: pc1
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## ID..       9 136.14 15.1270  5.6434 7.986e-06 ***
## Residuals 69 184.95  2.6805                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Personality_data analysis

Kasia Wojczulanis-Jakubas

30/05/2016

————————————————————

Data frames

————————————————————

Exploring pers variables

Density plots for all pers variables - CONTROL

Density plots for all pers variables - EXPERIMENT

Ctr vs exp distributions of the pers variables - KS test

Individual distribution by pers_var CONTROL

Individual distribution by pers_var EXPERIMENT

Per individual, all pers_vars CONTROL

Per individual, all pers_vars EXPERIMENT

Testing for interindividual differences CONTROL (only, unsuff n ind tested in exp)

Relationships between the pers_var CONTROL (only ctr, unsuff n ind tested in exp)

Relationships between the pers_var CONTROL, only vars with >2 n ind distinct in the ANOVA

Clustering individuals - CONTROL - all pers_var - testing various distance and clustering methods

Cophenetic correlations - to find the best method of dist and clust

Comp of the two trees with the best cophenetic correlations

Testing cohenetic for the two above

PCA

Reapeted ANOVA on PCs

Testing interindvidual differences with PC1 and PC2 CONTROL only