Data preparation
setDTthreads(threads = 28)
Array2 <- fread("/mnt/data/pgs/ArrayPGSnew.csv")
Array2$Population <- sub("^1KG\\.", "", Array2$Population)
Fst <- fread("/mnt/data/newfiles/merged_new/pairwise_fst_array_hudson_with1kg.fst.summary")
colnames(Fst)[colnames(Fst) == "#POP1"] <- "POP1"
Heatmap of Fst distances
Distance matrix calculation
## POP1 POP2 EA3_distance NIQ_seb_distance
## 1 ACB Algeria 0.85669742 1.84
## 2 ACB Amazonia 0.29660255 3.51
## 3 ACB Andes 0.17054116 3.51
## 4 ACB Armenian 1.37302935 6.14
## 5 ACB ASW 0.05052031 2.51
## 6 ACB Athabask 0.03542741 8.82
Correlations between Fst and EA3 distances and NIQ distances
## [1] 0.5627015
## [1] 0.3876754
Permutation test
## [1] 0.5627015
## [1] 0
Piffer-Mantel test: linear regression
##
## Call:
## lm(formula = NIQ_seb_distance ~ EA3_distance + HUDSON_FST, data = merged_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.738 -6.700 -1.204 6.185 29.177
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.9917 0.2951 20.31 <2e-16 ***
## EA3_distance 5.3739 0.3179 16.91 <2e-16 ***
## HUDSON_FST 34.8928 3.2973 10.58 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.602 on 2877 degrees of freedom
## Multiple R-squared: 0.2271, Adjusted R-squared: 0.2265
## F-statistic: 422.6 on 2 and 2877 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = NIQ_seb_distance ~ EA3_distance + HUDSON_FST, data = merged_df)
##
## Standardized Coefficients::
## (Intercept) EA3_distance HUDSON_FST
## NA 0.3307847 0.2070447
Moran I for K= 1 to 10
Correlation between PGS and lag PGS for K 1 to 10
Moran I with method of inverse distances
## k = 1 Moran's I = 0.635144
## k = 2 Moran's I = 0.5968071
## k = 3 Moran's I = 0.6189198
## k = 4 Moran's I = 0.6245675
## k = 5 Moran's I = 0.5871212
## k = 6 Moran's I = 0.5925706
## k = 7 Moran's I = 0.5896619
## k = 8 Moran's I = 0.5728204
## k = 9 Moran's I = 0.5722084
## k = 10 Moran's I = 0.5533624
Correlation with method of inverse distances
## k = 1 Correlation = 0.646309
## k = 2 Correlation = 0.6613765
## k = 3 Correlation = 0.6768735
## k = 4 Correlation = 0.6845825
## k = 5 Correlation = 0.6703679
## k = 6 Correlation = 0.6802795
## k = 7 Correlation = 0.6930718
## k = 8 Correlation = 0.6939943
## k = 9 Correlation = 0.7223635
## k = 10 Correlation = 0.7204816
Plot the data for the best model
## `geom_smooth()` using formula = 'y ~ x'
Comoute correlation between NIQ and residuals
## [1] 0.447565
## `geom_smooth()` using formula = 'y ~ x'
Regression with spatial lag computed using K nearest neighbour. K= 1
##
## Call:
## lm(formula = NIQ_seb ~ EA3 + EA3_lag, data = plot_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.8476 -5.1707 0.4778 5.5304 15.1216
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 88.6104 0.8763 101.123 < 2e-16 ***
## EA3 9.8314 1.6067 6.119 3.08e-08 ***
## EA3_lag 3.1083 1.6146 1.925 0.0577 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.927 on 82 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.5354, Adjusted R-squared: 0.5241
## F-statistic: 47.25 on 2 and 82 DF, p-value: 2.242e-14
##
## Call:
## lm(formula = NIQ_seb ~ EA3 + EA3_lag, data = plot_data)
##
## Standardized Coefficients::
## (Intercept) EA3 EA3_lag
## NA 0.5974768 0.1879777
Regression with spatial lag computed using inverse distances. K=9
model_niq_invdist <- lm(NIQ_seb ~ EA3 + EA3_lag_inverse, data = plot_data)
summary(model_niq_invdist)
##
## Call:
## lm(formula = NIQ_seb ~ EA3 + EA3_lag_inverse, data = plot_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.421 -5.209 0.908 5.845 15.829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 88.367 0.938 94.204 < 2e-16 ***
## EA3 9.884 1.776 5.565 3.21e-07 ***
## EA3_lag_inverse 3.419 2.252 1.518 0.133
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.993 on 82 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.5277, Adjusted R-squared: 0.5162
## F-statistic: 45.8 on 2 and 82 DF, p-value: 4.406e-14
# Compute the standardized betas
lm_model_standardized_niq_invdist <- lm.beta(model_niq_invdist)
# Print the summary of the linear regression model with standardized betas
lm_model_standardized_niq_invdist
##
## Call:
## lm(formula = NIQ_seb ~ EA3 + EA3_lag_inverse, data = plot_data)
##
## Standardized Coefficients::
## (Intercept) EA3 EA3_lag_inverse
## NA 0.6006882 0.1638785