Data preparation

setDTthreads(threads = 28)

Array2 <- fread("/mnt/data/pgs/ArrayPGSnew.csv")
Array2$Population <- sub("^1KG\\.", "", Array2$Population)

Fst <- fread("/mnt/data/newfiles/merged_new/pairwise_fst_array_hudson_with1kg.fst.summary")
colnames(Fst)[colnames(Fst) == "#POP1"] <- "POP1"

Heatmap of Fst distances

Distance matrix calculation

##   POP1     POP2 EA3_distance NIQ_seb_distance
## 1  ACB  Algeria   0.85669742             1.84
## 2  ACB Amazonia   0.29660255             3.51
## 3  ACB    Andes   0.17054116             3.51
## 4  ACB Armenian   1.37302935             6.14
## 5  ACB      ASW   0.05052031             2.51
## 6  ACB Athabask   0.03542741             8.82

Correlations between Fst and EA3 distances and NIQ distances

## [1] 0.5627015
## [1] 0.3876754

Permutation test

## [1] 0.5627015
## [1] 0

Piffer-Mantel test: linear regression

## 
## Call:
## lm(formula = NIQ_seb_distance ~ EA3_distance + HUDSON_FST, data = merged_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.738  -6.700  -1.204   6.185  29.177 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    5.9917     0.2951   20.31   <2e-16 ***
## EA3_distance   5.3739     0.3179   16.91   <2e-16 ***
## HUDSON_FST    34.8928     3.2973   10.58   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.602 on 2877 degrees of freedom
## Multiple R-squared:  0.2271, Adjusted R-squared:  0.2265 
## F-statistic: 422.6 on 2 and 2877 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = NIQ_seb_distance ~ EA3_distance + HUDSON_FST, data = merged_df)
## 
## Standardized Coefficients::
##  (Intercept) EA3_distance   HUDSON_FST 
##           NA    0.3307847    0.2070447

Moran I for K= 1 to 10

Correlation between PGS and lag PGS for K 1 to 10

Moran I with method of inverse distances

## k = 1 Moran's I = 0.635144 
## k = 2 Moran's I = 0.5968071 
## k = 3 Moran's I = 0.6189198 
## k = 4 Moran's I = 0.6245675 
## k = 5 Moran's I = 0.5871212 
## k = 6 Moran's I = 0.5925706 
## k = 7 Moran's I = 0.5896619 
## k = 8 Moran's I = 0.5728204 
## k = 9 Moran's I = 0.5722084 
## k = 10 Moran's I = 0.5533624

Correlation with method of inverse distances

## k = 1 Correlation = 0.646309 
## k = 2 Correlation = 0.6613765 
## k = 3 Correlation = 0.6768735 
## k = 4 Correlation = 0.6845825 
## k = 5 Correlation = 0.6703679 
## k = 6 Correlation = 0.6802795 
## k = 7 Correlation = 0.6930718 
## k = 8 Correlation = 0.6939943 
## k = 9 Correlation = 0.7223635 
## k = 10 Correlation = 0.7204816

Plot the data for the best model

## `geom_smooth()` using formula = 'y ~ x'

Comoute correlation between NIQ and residuals

## [1] 0.447565
## `geom_smooth()` using formula = 'y ~ x'

Regression with spatial lag computed using K nearest neighbour. K= 1

## 
## Call:
## lm(formula = NIQ_seb ~ EA3 + EA3_lag, data = plot_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.8476  -5.1707   0.4778   5.5304  15.1216 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  88.6104     0.8763 101.123  < 2e-16 ***
## EA3           9.8314     1.6067   6.119 3.08e-08 ***
## EA3_lag       3.1083     1.6146   1.925   0.0577 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.927 on 82 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.5354, Adjusted R-squared:  0.5241 
## F-statistic: 47.25 on 2 and 82 DF,  p-value: 2.242e-14
## 
## Call:
## lm(formula = NIQ_seb ~ EA3 + EA3_lag, data = plot_data)
## 
## Standardized Coefficients::
## (Intercept)         EA3     EA3_lag 
##          NA   0.5974768   0.1879777

Regression with spatial lag computed using inverse distances. K=9

model_niq_invdist <- lm(NIQ_seb ~ EA3 + EA3_lag_inverse, data = plot_data)
summary(model_niq_invdist)
## 
## Call:
## lm(formula = NIQ_seb ~ EA3 + EA3_lag_inverse, data = plot_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.421  -5.209   0.908   5.845  15.829 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       88.367      0.938  94.204  < 2e-16 ***
## EA3                9.884      1.776   5.565 3.21e-07 ***
## EA3_lag_inverse    3.419      2.252   1.518    0.133    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.993 on 82 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.5277, Adjusted R-squared:  0.5162 
## F-statistic:  45.8 on 2 and 82 DF,  p-value: 4.406e-14
# Compute the standardized betas
lm_model_standardized_niq_invdist <- lm.beta(model_niq_invdist)

# Print the summary of the linear regression model with standardized betas
lm_model_standardized_niq_invdist
## 
## Call:
## lm(formula = NIQ_seb ~ EA3 + EA3_lag_inverse, data = plot_data)
## 
## Standardized Coefficients::
##     (Intercept)             EA3 EA3_lag_inverse 
##              NA       0.6006882       0.1638785