Merge / recode
#fix ids
pgs_scores$IID %<>% std_ids()
lahn_vcf$id %<>% std_ids()
meta$Instance_ID %<>% std_ids()
#join
d = full_join(meta, pgs_scores, by = c("Instance_ID" = "IID")) %>%
full_join(lahn_vcf, by = c("Instance_ID" = "id"))
#ensure correct
assert_that(nrow(d) == nrow(meta))
## [1] TRUE
assert_that(!any(duplicated(d$Instance_ID)))
## [1] TRUE
#rename
d$ybp = d$Average_of_95_4pct_date_range_in_calBP_defined_as_1950_CE %>% as.numeric()
## Warning in function_list[[k]](value): NAs introduced by coercion
d$abs_lat = d$Lat %>% as.numeric() %>% abs()
## Warning in function_list[[i]](value): NAs introduced by coercion
d$long = d$Long %>% as.numeric()
## Warning in function_list[[k]](value): NAs introduced by coercion
d$coverage = d$Coverage %>% as.numeric()
## Warning in function_list[[k]](value): NAs introduced by coercion
d$snp_hits = d$SNPs_hit_on_autosomes %>% as.numeric()
d$data_source = d$Skeletal_element %>% plyr::mapvalues(from = c("Tooth", "Petrous"), to = c("tooth", "petrous")) %>% factor()
d$country = d$Country %>% factor()
#recode into broad geographical regions
d$ISO = d$country %>% as.character() %>% plyr::mapvalues(c("..", "Orkney Islands"), c(NA, "Scotland")) %>% pu_translate()
## No exact match: BotswanaOrNamibia
## No exact match: BotswanaOrNamibia
## No exact match: BotswanaOrNamibia
## No exact match: Western Sahara (Morocco)
## No exact match: BotswanaOrNamibia
## No exact match: Western Sahara (Morocco)
## Best fuzzy match found: BotswanaOrNamibia -> Botswana with distance 9.00
## Best fuzzy match found: BotswanaOrNamibia -> Botswana with distance 9.00
## Best fuzzy match found: BotswanaOrNamibia -> Botswana with distance 9.00
## Best fuzzy match found: Western Sahara (Morocco) -> Western Sahara with distance 10.00
## Best fuzzy match found: BotswanaOrNamibia -> Botswana with distance 9.00
## Best fuzzy match found: Western Sahara (Morocco) -> Western Sahara with distance 10.00
#other national data
mega = read_csv("data/Megadataset_v2.0p.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_character(),
## LVnotes = col_character(),
## Names = col_character(),
## Capital = col_character(),
## se_Land = col_character(),
## se_abbrev = col_character(),
## de_Country = col_character(),
## de_abbrev = col_character(),
## de_Country_EN = col_character(),
## de_Western_neighbor = col_logical(),
## de_NW_European = col_logical(),
## dk_Origin = col_character(),
## Country_EN = col_character()
## )
## See spec(...) for full column specifications.
#population data
worldpop = read_html("data/wiki_worldpop.html") %>% html_table() %>% .[[2]] %>% df_legalize_names()
worldpop %<>% mutate(
population2017 = Population_1_July_2017_2 %>% str_replace_all(",", "") %>% as.integer(),
pop = population2017,
log_pop = log10(population2017),
country = str_replace(Country_or_area, "\\[.*", ""),
ISO = pu_translate(country)
) %>% na.omit()
## Warning in function_list[[k]](value): NAs introduced by coercion to integer
## range
## No exact match: World
## No exact match: Guernsey and  Jersey
## Best fuzzy match found: World -> world with distance 1.00
## Best fuzzy match found: Guernsey and  Jersey -> Guernsey (GBR) with distance 11.00
#merge regions into main
d = left_join(d, worldpop %>% select(ISO, UN_continental_region_1, UN_statistical_region_1))
## Joining, by = "ISO"
## Warning: Column `ISO` has different attributes on LHS and RHS of join
d$UN_continental_region_1 = d$UN_continental_region_1 %>% factor() %>% fct_relevel("Europe")
#subsets
#olds
d2 = d %>% filter(ybp > 150)
d2 %>% nrow()
## [1] 2081
#europeans
d2_euro = d2 %>% filter(UN_continental_region_1 == "Europe")
#near modern europeans
d2_NM_euro = d %>% filter(UN_continental_region_1 == "Europe", ybp < 5000, ybp > 100)
Results
Descriptives
nrow(d)
## [1] 5081
#age distribution
d$ybp %>% GG_denhist()
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: Removed 1 rows containing non-finite values (stat_density).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: Removed 1 rows containing non-finite values (stat_density).

d$ybp %>% describe()
#olds
d2$ybp %>% GG_denhist()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

d2$ybp %>% describe()
#regions
(regional_table = d2$UN_continental_region_1 %>% table2())
Simple plots
#plot all
ggplot(d, aes(ybp, CAVIARBF_supl_table_SCORE)) +
geom_point() +
geom_smooth() +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

#plot -- no moderns
d2 %>%
ggplot(aes(ybp, CAVIARBF_supl_table_SCORE)) +
geom_point() +
geom_smooth() +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#plot -- no moderns, Europe only
d2_euro %>%
ggplot(aes(ybp, CAVIARBF_supl_table_SCORE)) +
geom_point() +
geom_smooth() +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#plot -- near moderns, Europe only
d2_NM_euro %>%
ggplot(aes(ybp, CAVIARBF_supl_table_SCORE)) +
geom_point() +
geom_smooth() +
theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Primary models
With causal variants PGS from Lee et al 2018.
#simple
(model_1 = ols(CAVIARBF_supl_table_SCORE ~ ybp, data = d2))
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp, data = d2)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 2081 LR chi2 9.33 R2 0.004
## sigma0.0006 d.f. 1 R2 adj 0.004
## d.f. 2079 Pr(> chi2) 0.0023 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.662e-03 -3.266e-04 -9.835e-05 2.968e-04 3.186e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0001 0.0000 5.48 <0.0001
## ybp 0.0000 0.0000 -3.06 0.0023
##
#latitude
(model_2 = ols(CAVIARBF_supl_table_SCORE ~ ybp + abs_lat, data = d2))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat
## 244
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp + abs_lat, data = d2)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1837 LR chi2 37.01 R2 0.020
## sigma0.0006 d.f. 2 R2 adj 0.019
## d.f. 1834 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.705e-03 -3.268e-04 -7.887e-05 3.018e-04 3.166e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0002 0.0001 -3.37 0.0008
## ybp 0.0000 0.0000 -3.45 0.0006
## abs_lat 0.0000 0.0000 5.29 <0.0001
##
#continental dummies
(model_3 = ols(CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + UN_continental_region_1, data = d2))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat UN_continental_region_1
## 244 9
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + UN_continental_region_1,
## data = d2)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1832 LR chi2 71.47 R2 0.038
## sigma0.0006 d.f. 6 R2 adj 0.035
## d.f. 1825 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.556e-03 -3.315e-04 -8.013e-05 3.016e-04 3.105e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0003 0.0001 2.35 0.0190
## ybp 0.0000 0.0000 -4.19 <0.0001
## abs_lat 0.0000 0.0000 -0.72 0.4744
## UN_continental_region_1=Africa -0.0003 0.0001 -2.11 0.0350
## UN_continental_region_1=Americas -0.0003 0.0001 -4.26 <0.0001
## UN_continental_region_1=Asia -0.0002 0.0000 -3.77 0.0002
## UN_continental_region_1=Oceania -0.0007 0.0001 -5.07 <0.0001
##
#quality
(model_4 = ols(CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + UN_continental_region_1 + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat UN_continental_region_1
## 244 9
## CAVIARBF_supl_table_CNT coverage
## 0 1
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + UN_continental_region_1 +
## CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source, data = d2)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1832 LR chi2 159.50 R2 0.083
## sigma0.0006 d.f. 28 R2 adj 0.069
## d.f. 1803 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.637e-03 -3.128e-04 -3.003e-05 3.012e-04 3.114e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0001 0.0001 0.63 0.5293
## ybp 0.0000 0.0000 -3.90 <0.0001
## abs_lat 0.0000 0.0000 -1.25 0.2102
## UN_continental_region_1=Africa -0.0003 0.0001 -2.14 0.0325
## UN_continental_region_1=Americas -0.0002 0.0001 -3.50 0.0005
## UN_continental_region_1=Asia -0.0001 0.0001 -2.76 0.0059
## UN_continental_region_1=Oceania -0.0007 0.0001 -5.21 <0.0001
## CAVIARBF_supl_table_CNT 0.0000 0.0000 1.06 0.2898
## coverage -0.0012 0.0016 -0.77 0.4405
## coverage' 0.2625 0.3032 0.87 0.3867
## coverage'' -0.3251 0.3766 -0.86 0.3881
## coverage''' 0.0625 0.0745 0.84 0.4012
## snp_hits 0.0000 0.0000 0.85 0.3971
## snp_hits' 0.0000 0.0000 -0.69 0.4908
## snp_hits'' 0.0000 0.0000 0.57 0.5670
## snp_hits''' 0.0000 0.0000 0.16 0.8741
## data_source=bone -0.0001 0.0001 -0.56 0.5756
## data_source=bone (cranial) 0.0001 0.0002 0.65 0.5140
## data_source=bone (long bone) 0.0002 0.0001 2.35 0.0190
## data_source=bone (phalanx) 0.0001 0.0002 0.66 0.5081
## data_source=Hair -0.0009 0.0006 -1.42 0.1564
## data_source=petrous 0.0001 0.0000 1.20 0.2289
## data_source=Rib 0.0002 0.0006 0.34 0.7356
## data_source=tooth 0.0001 0.0000 1.86 0.0626
## data_source=tooth (canine) 0.0004 0.0003 1.38 0.1691
## data_source=tooth (incisor) -0.0001 0.0004 -0.33 0.7450
## data_source=tooth (molar) 0.0001 0.0001 1.32 0.1887
## data_source=tooth (premolar) -0.0001 0.0002 -0.26 0.7938
## data_source=tooth and bone (cranial) 0.0000 0.0006 -0.06 0.9516
##
#nonlinear age
(model_5 = ols(CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) + abs_lat + UN_continental_region_1 + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat UN_continental_region_1
## 244 9
## CAVIARBF_supl_table_CNT coverage
## 0 1
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) + abs_lat +
## UN_continental_region_1 + CAVIARBF_supl_table_CNT + rcs(coverage) +
## rcs(snp_hits) + data_source, data = d2)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1832 LR chi2 167.11 R2 0.087
## sigma0.0006 d.f. 30 R2 adj 0.072
## d.f. 1801 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.679e-03 -3.208e-04 -1.435e-05 3.067e-04 3.071e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0001 0.0002 -0.44 0.6564
## ybp 0.0000 0.0000 2.02 0.0436
## ybp' 0.0000 0.0000 -2.71 0.0068
## ybp'' 0.0000 0.0000 2.74 0.0063
## abs_lat 0.0000 0.0000 -1.18 0.2396
## UN_continental_region_1=Africa -0.0002 0.0001 -1.89 0.0587
## UN_continental_region_1=Americas -0.0002 0.0001 -2.85 0.0045
## UN_continental_region_1=Asia -0.0001 0.0001 -2.50 0.0124
## UN_continental_region_1=Oceania -0.0007 0.0001 -4.93 <0.0001
## CAVIARBF_supl_table_CNT 0.0000 0.0000 1.02 0.3059
## coverage -0.0012 0.0016 -0.76 0.4486
## coverage' 0.2522 0.3028 0.83 0.4051
## coverage'' -0.3120 0.3762 -0.83 0.4071
## coverage''' 0.0595 0.0744 0.80 0.4238
## snp_hits 0.0000 0.0000 0.88 0.3816
## snp_hits' 0.0000 0.0000 -0.72 0.4698
## snp_hits'' 0.0000 0.0000 0.61 0.5388
## snp_hits''' 0.0000 0.0000 0.08 0.9377
## data_source=bone -0.0001 0.0001 -0.73 0.4632
## data_source=bone (cranial) 0.0001 0.0002 0.70 0.4837
## data_source=bone (long bone) 0.0002 0.0001 2.22 0.0266
## data_source=bone (phalanx) 0.0001 0.0002 0.63 0.5320
## data_source=Hair -0.0010 0.0006 -1.57 0.1175
## data_source=petrous 0.0001 0.0001 1.13 0.2575
## data_source=Rib 0.0001 0.0006 0.23 0.8155
## data_source=tooth 0.0001 0.0001 1.44 0.1488
## data_source=tooth (canine) 0.0004 0.0003 1.37 0.1702
## data_source=tooth (incisor) -0.0001 0.0004 -0.29 0.7727
## data_source=tooth (molar) 0.0001 0.0001 1.56 0.1189
## data_source=tooth (premolar) 0.0000 0.0002 -0.09 0.9272
## data_source=tooth and bone (cranial) 0.0000 0.0006 -0.03 0.9735
##
#slope interaction
(model_6 = ols(CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) * UN_continental_region_1 + abs_lat + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2 %>% filter(UN_continental_region_1 %in% regional_table$Group[1:3])))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## UN_continental_region_1 abs_lat
## 0 223
## CAVIARBF_supl_table_CNT coverage
## 0 1
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) * UN_continental_region_1 +
## abs_lat + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source, data = d2 %>% filter(UN_continental_region_1 %in%
## regional_table$Group[1:3]))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1762 LR chi2 165.81 R2 0.090
## sigma0.0006 d.f. 34 R2 adj 0.072
## d.f. 1727 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.646e-03 -3.098e-04 -1.505e-05 3.033e-04 2.990e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0000 0.0002 0.22 0.8283
## ybp 0.0000 0.0000 0.83 0.4062
## ybp' 0.0000 0.0000 -2.06 0.0397
## ybp'' 0.0000 0.0000 2.28 0.0227
## UN_continental_region_1=Americas -0.0002 0.0002 -1.09 0.2742
## UN_continental_region_1=Asia -0.0006 0.0002 -3.22 0.0013
## abs_lat 0.0000 0.0000 -0.96 0.3386
## CAVIARBF_supl_table_CNT 0.0000 0.0000 0.97 0.3344
## coverage -0.0010 0.0019 -0.52 0.6026
## coverage' 0.1987 0.3179 0.63 0.5320
## coverage'' -0.2474 0.3963 -0.62 0.5326
## coverage''' 0.0484 0.0791 0.61 0.5406
## snp_hits 0.0000 0.0000 0.65 0.5141
## snp_hits' 0.0000 0.0000 -0.60 0.5510
## snp_hits'' 0.0000 0.0000 0.50 0.6136
## snp_hits''' 0.0000 0.0000 0.18 0.8599
## data_source=bone -0.0001 0.0001 -0.99 0.3246
## data_source=bone (cranial) 0.0001 0.0002 0.87 0.3838
## data_source=bone (long bone) 0.0002 0.0001 1.98 0.0479
## data_source=bone (phalanx) 0.0000 0.0002 0.23 0.8217
## data_source=Hair -0.0010 0.0006 -1.54 0.1236
## data_source=petrous 0.0000 0.0001 -0.21 0.8342
## data_source=Rib 0.0002 0.0006 0.25 0.7994
## data_source=tooth 0.0000 0.0001 0.76 0.4501
## data_source=tooth (canine) 0.0004 0.0003 1.28 0.2015
## data_source=tooth (incisor) -0.0002 0.0004 -0.49 0.6255
## data_source=tooth (molar) 0.0001 0.0001 1.11 0.2678
## data_source=tooth (premolar) -0.0001 0.0002 -0.23 0.8153
## data_source=tooth and bone (cranial) 0.0000 0.0006 -0.03 0.9738
## ybp * UN_continental_region_1=Americas 0.0000 0.0000 -0.78 0.4362
## ybp' * UN_continental_region_1=Americas 0.0000 0.0000 1.38 0.1679
## ybp'' * UN_continental_region_1=Americas 0.0000 0.0000 -1.53 0.1263
## ybp * UN_continental_region_1=Asia 0.0000 0.0000 1.71 0.0873
## ybp' * UN_continental_region_1=Asia 0.0000 0.0000 -0.49 0.6228
## ybp'' * UN_continental_region_1=Asia 0.0000 0.0000 0.14 0.8850
##
#steps
lrtest(model_1, model_2)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp
## Model 2: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat
##
## L.R. Chisq d.f. P
## 2.77e+01 1.00e+00 1.43e-07
lrtest(model_2, model_3)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat
## Model 2: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + UN_continental_region_1
##
## L.R. Chisq d.f. P
## 3.45e+01 4.00e+00 6.00e-07
lrtest(model_3, model_4)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + UN_continental_region_1
## Model 2: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + UN_continental_region_1 +
## CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source
##
## L.R. Chisq d.f. P
## 8.80e+01 2.20e+01 7.41e-10
lrtest(model_4, model_5)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + UN_continental_region_1 +
## CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source
## Model 2: CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) + abs_lat + UN_continental_region_1 +
## CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source
##
## L.R. Chisq d.f. P
## 7.6100 2.0000 0.0223
lrtest(model_5, model_6)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) + abs_lat + UN_continental_region_1 +
## CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source
## Model 2: CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) * UN_continental_region_1 +
## abs_lat + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source
##
## L.R. Chisq d.f. P
## 1.302 4.000 0.861
#plot expected age pattern
model_preds = data.frame(
ybp = 154:90000, #full range,
abs_lat = d2$abs_lat %>% wtd_mean(), #mean covars
long = d2$long %>% wtd_mean(), #mean covars
CAVIARBF_supl_table_CNT = d2$CAVIARBF_supl_table_CNT %>% wtd_mean(),
coverage = d2$coverage %>% wtd_mean(),
snp_hits = d2$snp_hits %>% wtd_mean(),
data_source = "petrous", #mode
UN_continental_region_1 = "Europe" #mode
)
model_preds$model_1 = predict(model_1, newdata = model_preds)
model_preds$model_2 = predict(model_2, newdata = model_preds)
model_preds$model_3 = predict(model_3, newdata = model_preds)
model_preds$model_4 = predict(model_4, newdata = model_preds)
model_preds$model_5 = predict(model_5, newdata = model_preds)
model_preds$model_6 = predict(model_6, newdata = model_preds)
#plot predictions
model_preds %>%
gather(key = model, value = prediction, starts_with("model_")) %>%
ggplot(aes(ybp, prediction, color = model)) +
geom_line() +
theme_bw()

#for each continent
#plot expected age pattern by continent
model_preds2 = data.frame(
ybp = 154:90000, #full range,
abs_lat = d2$abs_lat %>% wtd_mean(), #mean covars
long = d2$long %>% wtd_mean(), #mean covars
CAVIARBF_supl_table_CNT = d2$CAVIARBF_supl_table_CNT %>% wtd_mean(),
coverage = d2$coverage %>% wtd_mean(),
snp_hits = d2$snp_hits %>% wtd_mean(),
data_source = "petrous", #mode
UN_continental_region_1 = rep(c("Europe", "Asia", "Americas"),
length.out = length(154:90000))
)
model_preds2$model_6 = predict(model_6, newdata = model_preds2)
#plot predictions
model_preds2 %>%
gather(key = model, value = prediction, starts_with("model_")) %>%
ggplot(aes(ybp, prediction, color = UN_continental_region_1)) +
geom_line() +
theme_bw()

The nonlinear slope * continent model was not significant, so redisregard the weird predictions of that.
Alternative polygenic score: MTAG education lead snps
Lead SNPs MTAG, same source.
#simple
(model_1 = ols(mtag_edu_lead_snps_gwas_SCORE ~ ybp, data = d2))
## Linear Regression Model
##
## ols(formula = mtag_edu_lead_snps_gwas_SCORE ~ ybp, data = d2)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 2081 LR chi2 60.92 R2 0.029
## sigma0.0002 d.f. 1 R2 adj 0.028
## d.f. 2079 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -8.943e-04 -8.423e-05 -6.122e-06 8.322e-05 7.214e-04
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0001 0.0000 12.22 <0.0001
## ybp 0.0000 0.0000 -7.86 <0.0001
##
#latitude
(model_2 = ols(mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat, data = d2))
## Frequencies of Missing Values Due to Each Variable
## mtag_edu_lead_snps_gwas_SCORE ybp
## 0 0
## abs_lat
## 244
##
## Linear Regression Model
##
## ols(formula = mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat,
## data = d2)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1837 LR chi2 70.86 R2 0.038
## sigma0.0002 d.f. 2 R2 adj 0.037
## d.f. 1834 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -7.106e-04 -8.574e-05 -4.582e-06 8.147e-05 7.081e-04
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0000 0.0000 0.85 0.3938
## ybp 0.0000 0.0000 -8.13 <0.0001
## abs_lat 0.0000 0.0000 3.05 0.0023
##
#continental dummies
(model_3 = ols(mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat + UN_continental_region_1, data = d2))
## Frequencies of Missing Values Due to Each Variable
## mtag_edu_lead_snps_gwas_SCORE ybp
## 0 0
## abs_lat UN_continental_region_1
## 244 9
##
## Linear Regression Model
##
## ols(formula = mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat +
## UN_continental_region_1, data = d2)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1832 LR chi2 100.89 R2 0.054
## sigma0.0002 d.f. 6 R2 adj 0.050
## d.f. 1825 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -7.097e-04 -8.663e-05 -5.698e-06 8.340e-05 7.137e-04
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0000 0.0000 0.99 0.3242
## ybp 0.0000 0.0000 -7.86 <0.0001
## abs_lat 0.0000 0.0000 1.26 0.2067
## UN_continental_region_1=Africa -0.0001 0.0000 -3.78 0.0002
## UN_continental_region_1=Americas 0.0000 0.0000 1.58 0.1136
## UN_continental_region_1=Asia 0.0000 0.0000 1.29 0.1983
## UN_continental_region_1=Oceania -0.0001 0.0000 -1.47 0.1417
##
#quality
(model_4 = ols(mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat + UN_continental_region_1 + mtag_edu_lead_snps_gwas_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2))
## Frequencies of Missing Values Due to Each Variable
## mtag_edu_lead_snps_gwas_SCORE ybp
## 0 0
## abs_lat UN_continental_region_1
## 244 9
## mtag_edu_lead_snps_gwas_CNT coverage
## 0 1
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat +
## UN_continental_region_1 + mtag_edu_lead_snps_gwas_CNT + rcs(coverage) +
## rcs(snp_hits) + data_source, data = d2)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1832 LR chi2 131.81 R2 0.069
## sigma0.0002 d.f. 28 R2 adj 0.055
## d.f. 1803 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -7.001e-04 -8.540e-05 -1.404e-06 8.660e-05 7.084e-04
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0001 0.0000 1.40 0.1624
## ybp 0.0000 0.0000 -6.78 <0.0001
## abs_lat 0.0000 0.0000 0.34 0.7329
## UN_continental_region_1=Africa -0.0001 0.0000 -4.23 <0.0001
## UN_continental_region_1=Americas 0.0000 0.0000 1.04 0.2985
## UN_continental_region_1=Asia 0.0000 0.0000 0.56 0.5722
## UN_continental_region_1=Oceania -0.0001 0.0000 -1.98 0.0480
## mtag_edu_lead_snps_gwas_CNT 0.0000 0.0000 -0.59 0.5546
## coverage -0.0004 0.0004 -0.90 0.3682
## coverage' 0.0928 0.0809 1.15 0.2512
## coverage'' -0.1166 0.1005 -1.16 0.2459
## coverage''' 0.0245 0.0199 1.23 0.2183
## snp_hits 0.0000 0.0000 1.12 0.2613
## snp_hits' 0.0000 0.0000 -1.08 0.2820
## snp_hits'' 0.0000 0.0000 1.01 0.3127
## snp_hits''' 0.0000 0.0000 -0.35 0.7301
## data_source=bone 0.0000 0.0000 -0.35 0.7267
## data_source=bone (cranial) 0.0000 0.0000 0.89 0.3759
## data_source=bone (long bone) 0.0000 0.0000 1.14 0.2530
## data_source=bone (phalanx) 0.0000 0.0000 -0.44 0.6571
## data_source=Hair 0.0004 0.0002 2.20 0.0280
## data_source=petrous 0.0000 0.0000 -0.44 0.6566
## data_source=Rib 0.0000 0.0002 -0.12 0.9017
## data_source=tooth 0.0000 0.0000 -0.71 0.4798
## data_source=tooth (canine) -0.0001 0.0001 -1.17 0.2425
## data_source=tooth (incisor) -0.0003 0.0001 -3.13 0.0018
## data_source=tooth (molar) 0.0000 0.0000 -1.46 0.1435
## data_source=tooth (premolar) -0.0001 0.0001 -1.51 0.1319
## data_source=tooth and bone (cranial) -0.0001 0.0002 -0.61 0.5413
##
#nonlinear age
(model_5 = ols(mtag_edu_lead_snps_gwas_SCORE ~ rcs(ybp, 4) + abs_lat + UN_continental_region_1 + mtag_edu_lead_snps_gwas_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2))
## Frequencies of Missing Values Due to Each Variable
## mtag_edu_lead_snps_gwas_SCORE ybp
## 0 0
## abs_lat UN_continental_region_1
## 244 9
## mtag_edu_lead_snps_gwas_CNT coverage
## 0 1
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = mtag_edu_lead_snps_gwas_SCORE ~ rcs(ybp, 4) + abs_lat +
## UN_continental_region_1 + mtag_edu_lead_snps_gwas_CNT + rcs(coverage) +
## rcs(snp_hits) + data_source, data = d2)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1832 LR chi2 173.16 R2 0.090
## sigma0.0002 d.f. 30 R2 adj 0.075
## d.f. 1801 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -6.674e-04 -8.373e-05 -2.669e-06 8.780e-05 6.822e-04
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0000 0.0000 0.16 0.8700
## ybp 0.0000 0.0000 2.61 0.0092
## ybp' 0.0000 0.0000 -4.97 <0.0001
## ybp'' 0.0000 0.0000 5.35 <0.0001
## abs_lat 0.0000 0.0000 0.30 0.7653
## UN_continental_region_1=Africa -0.0001 0.0000 -4.01 <0.0001
## UN_continental_region_1=Americas 0.0000 0.0000 1.45 0.1463
## UN_continental_region_1=Asia 0.0000 0.0000 0.90 0.3704
## UN_continental_region_1=Oceania -0.0001 0.0000 -2.42 0.0156
## mtag_edu_lead_snps_gwas_CNT 0.0000 0.0000 -0.62 0.5332
## coverage -0.0004 0.0004 -0.97 0.3327
## coverage' 0.0939 0.0800 1.17 0.2409
## coverage'' -0.1176 0.0994 -1.18 0.2369
## coverage''' 0.0243 0.0197 1.23 0.2170
## snp_hits 0.0000 0.0000 1.27 0.2048
## snp_hits' 0.0000 0.0000 -1.19 0.2335
## snp_hits'' 0.0000 0.0000 1.12 0.2619
## snp_hits''' 0.0000 0.0000 -0.42 0.6774
## data_source=bone 0.0000 0.0000 -0.24 0.8083
## data_source=bone (cranial) 0.0000 0.0000 1.16 0.2447
## data_source=bone (long bone) 0.0000 0.0000 1.55 0.1217
## data_source=bone (phalanx) 0.0000 0.0000 -0.57 0.5715
## data_source=Hair 0.0003 0.0002 2.10 0.0359
## data_source=petrous 0.0000 0.0000 0.57 0.5657
## data_source=Rib 0.0000 0.0002 -0.18 0.8561
## data_source=tooth 0.0000 0.0000 -0.34 0.7321
## data_source=tooth (canine) -0.0001 0.0001 -0.91 0.3606
## data_source=tooth (incisor) -0.0003 0.0001 -2.87 0.0041
## data_source=tooth (molar) 0.0000 0.0000 0.26 0.7923
## data_source=tooth (premolar) 0.0000 0.0001 -0.84 0.4021
## data_source=tooth and bone (cranial) -0.0001 0.0002 -0.43 0.6655
##
#slope interaction
(model_6 = ols(mtag_edu_lead_snps_gwas_SCORE ~ rcs(ybp, 4) * UN_continental_region_1 + abs_lat + mtag_edu_lead_snps_gwas_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2 %>% filter(UN_continental_region_1 %in% regional_table$Group[1:3])))
## Frequencies of Missing Values Due to Each Variable
## mtag_edu_lead_snps_gwas_SCORE ybp
## 0 0
## UN_continental_region_1 abs_lat
## 0 223
## mtag_edu_lead_snps_gwas_CNT coverage
## 0 1
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = mtag_edu_lead_snps_gwas_SCORE ~ rcs(ybp, 4) * UN_continental_region_1 +
## abs_lat + mtag_edu_lead_snps_gwas_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source, data = d2 %>% filter(UN_continental_region_1 %in%
## regional_table$Group[1:3]))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1762 LR chi2 182.22 R2 0.098
## sigma0.0002 d.f. 34 R2 adj 0.080
## d.f. 1727 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -6.505e-04 -8.482e-05 -4.012e-06 9.009e-05 6.642e-04
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0001 0.0000 1.34 0.1808
## ybp 0.0000 0.0000 0.96 0.3360
## ybp' 0.0000 0.0000 -4.10 <0.0001
## ybp'' 0.0000 0.0000 4.78 <0.0001
## UN_continental_region_1=Americas 0.0000 0.0000 -1.11 0.2688
## UN_continental_region_1=Asia 0.0000 0.0000 0.26 0.7981
## abs_lat 0.0000 0.0000 -0.19 0.8520
## mtag_edu_lead_snps_gwas_CNT 0.0000 0.0000 -0.57 0.5661
## coverage -0.0003 0.0005 -0.70 0.4863
## coverage' 0.0749 0.0830 0.90 0.3675
## coverage'' -0.0944 0.1035 -0.91 0.3619
## coverage''' 0.0200 0.0207 0.97 0.3322
## snp_hits 0.0000 0.0000 0.95 0.3422
## snp_hits' 0.0000 0.0000 -0.95 0.3402
## snp_hits'' 0.0000 0.0000 0.92 0.3597
## snp_hits''' 0.0000 0.0000 -0.36 0.7195
## data_source=bone 0.0000 0.0000 -0.39 0.6997
## data_source=bone (cranial) 0.0001 0.0000 1.75 0.0799
## data_source=bone (long bone) 0.0001 0.0000 2.45 0.0142
## data_source=bone (phalanx) 0.0000 0.0000 -0.77 0.4425
## data_source=Hair 0.0004 0.0002 2.16 0.0313
## data_source=petrous 0.0000 0.0000 0.11 0.9100
## data_source=Rib 0.0000 0.0002 -0.12 0.9051
## data_source=tooth 0.0000 0.0000 0.28 0.7786
## data_source=tooth (canine) -0.0001 0.0001 -0.78 0.4382
## data_source=tooth (incisor) -0.0003 0.0001 -2.91 0.0037
## data_source=tooth (molar) 0.0000 0.0000 1.12 0.2638
## data_source=tooth (premolar) 0.0000 0.0001 -0.56 0.5764
## data_source=tooth and bone (cranial) -0.0001 0.0002 -0.33 0.7434
## ybp * UN_continental_region_1=Americas 0.0000 0.0000 0.68 0.4937
## ybp' * UN_continental_region_1=Americas 0.0000 0.0000 -0.02 0.9840
## ybp'' * UN_continental_region_1=Americas 0.0000 0.0000 0.04 0.9704
## ybp * UN_continental_region_1=Asia 0.0000 0.0000 -0.91 0.3624
## ybp' * UN_continental_region_1=Asia 0.0000 0.0000 1.52 0.1297
## ybp'' * UN_continental_region_1=Asia 0.0000 0.0000 -1.46 0.1457
##
#steps
lrtest(model_1, model_2)
##
## Model 1: mtag_edu_lead_snps_gwas_SCORE ~ ybp
## Model 2: mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat
##
## L.R. Chisq d.f. P
## 9.93230 1.00000 0.00162
lrtest(model_2, model_3)
##
## Model 1: mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat
## Model 2: mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat + UN_continental_region_1
##
## L.R. Chisq d.f. P
## 3.00e+01 4.00e+00 4.83e-06
lrtest(model_3, model_4)
##
## Model 1: mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat + UN_continental_region_1
## Model 2: mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat + UN_continental_region_1 +
## mtag_edu_lead_snps_gwas_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source
##
## L.R. Chisq d.f. P
## 30.9243 22.0000 0.0977
lrtest(model_4, model_5)
##
## Model 1: mtag_edu_lead_snps_gwas_SCORE ~ ybp + abs_lat + UN_continental_region_1 +
## mtag_edu_lead_snps_gwas_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source
## Model 2: mtag_edu_lead_snps_gwas_SCORE ~ rcs(ybp, 4) + abs_lat + UN_continental_region_1 +
## mtag_edu_lead_snps_gwas_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source
##
## L.R. Chisq d.f. P
## 4.13e+01 2.00e+00 1.05e-09
lrtest(model_5, model_6)
##
## Model 1: mtag_edu_lead_snps_gwas_SCORE ~ rcs(ybp, 4) + abs_lat + UN_continental_region_1 +
## mtag_edu_lead_snps_gwas_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source
## Model 2: mtag_edu_lead_snps_gwas_SCORE ~ rcs(ybp, 4) * UN_continental_region_1 +
## abs_lat + mtag_edu_lead_snps_gwas_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source
##
## L.R. Chisq d.f. P
## 9.0637 4.0000 0.0595
#plot expected age pattern
model_preds = data.frame(
ybp = 154:90000, #full range,
abs_lat = d2$abs_lat %>% wtd_mean(), #mean covars
long = d2$long %>% wtd_mean(), #mean covars
mtag_edu_lead_snps_gwas_CNT = d2$mtag_edu_lead_snps_gwas_CNT %>% wtd_mean(),
coverage = d2$coverage %>% wtd_mean(),
snp_hits = d2$snp_hits %>% wtd_mean(),
data_source = "petrous", #mode
UN_continental_region_1 = "Europe" #mode
)
model_preds$model_1 = predict(model_1, newdata = model_preds)
model_preds$model_2 = predict(model_2, newdata = model_preds)
model_preds$model_3 = predict(model_3, newdata = model_preds)
model_preds$model_4 = predict(model_4, newdata = model_preds)
model_preds$model_5 = predict(model_5, newdata = model_preds)
model_preds$model_6 = predict(model_6, newdata = model_preds)
#plot predictions
model_preds %>%
gather(key = model, value = prediction, starts_with("model_")) %>%
ggplot(aes(ybp, prediction, color = model)) +
geom_line() +
theme_bw()

#for each continent
#plot expected age pattern by continent
model_preds2 = data.frame(
ybp = 154:90000, #full range,
abs_lat = d2$abs_lat %>% wtd_mean(), #mean covars
long = d2$long %>% wtd_mean(), #mean covars
mtag_edu_lead_snps_gwas_CNT = d2$mtag_edu_lead_snps_gwas_CNT %>% wtd_mean(),
coverage = d2$coverage %>% wtd_mean(),
snp_hits = d2$snp_hits %>% wtd_mean(),
data_source = "petrous", #mode
UN_continental_region_1 = rep(c("Europe", "Asia", "Americas"),
length.out = length(154:90000))
)
model_preds2$model_6 = predict(model_6, newdata = model_preds2)
#plot predictions
model_preds2 %>%
gather(key = model, value = prediction, starts_with("model_")) %>%
ggplot(aes(ybp, prediction, color = UN_continental_region_1)) +
geom_line() +
theme_bw()

Europeans only
#simple
(model_1 = ols(CAVIARBF_supl_table_SCORE ~ ybp, data = d2_euro))
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp, data = d2_euro)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1487 LR chi2 22.39 R2 0.015
## sigma0.0006 d.f. 1 R2 adj 0.014
## d.f. 1485 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.0023969 -0.0003313 -0.0001163 0.0002944 0.0031026
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0002 0.0000 8.94 <0.0001
## ybp 0.0000 0.0000 -4.75 <0.0001
##
#latitude
(model_2 = ols(CAVIARBF_supl_table_SCORE ~ ybp + abs_lat, data = d2_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat
## 173
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp + abs_lat, data = d2_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1314 LR chi2 20.33 R2 0.015
## sigma0.0006 d.f. 2 R2 adj 0.014
## d.f. 1311 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.0023899 -0.0003348 -0.0001147 0.0003025 0.0031084
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0001 0.0002 0.75 0.4563
## ybp 0.0000 0.0000 -4.49 <0.0001
## abs_lat 0.0000 0.0000 0.41 0.6855
##
#continental dummies
(model_3 = ols(CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long, data = d2_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat long
## 173 175
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long,
## data = d2_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1312 LR chi2 23.17 R2 0.018
## sigma0.0006 d.f. 3 R2 adj 0.015
## d.f. 1308 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.0023854 -0.0003366 -0.0001112 0.0003086 0.0031245
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0001 0.0002 0.65 0.5168
## ybp 0.0000 0.0000 -4.20 <0.0001
## abs_lat 0.0000 0.0000 0.62 0.5377
## long 0.0000 0.0000 -1.71 0.0880
##
#quality
(model_4 = ols(CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat long
## 173 175
## CAVIARBF_supl_table_CNT coverage
## 0 0
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long +
## CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source, data = d2_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1312 LR chi2 117.46 R2 0.086
## sigma0.0006 d.f. 23 R2 adj 0.069
## d.f. 1288 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.575e-03 -3.140e-04 -1.855e-05 2.951e-04 2.919e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0001 0.0002 0.41 0.6787
## ybp 0.0000 0.0000 -4.32 <0.0001
## abs_lat 0.0000 0.0000 -0.75 0.4529
## long 0.0000 0.0000 -2.01 0.0451
## CAVIARBF_supl_table_CNT 0.0000 0.0000 0.63 0.5301
## coverage -0.0035 0.0021 -1.64 0.1016
## coverage' 0.5190 0.2939 1.77 0.0776
## coverage'' -0.6431 0.3636 -1.77 0.0772
## coverage''' 0.1249 0.0703 1.78 0.0757
## snp_hits 0.0000 0.0000 1.81 0.0702
## snp_hits' 0.0000 0.0000 -1.55 0.1221
## snp_hits'' 0.0000 0.0000 1.38 0.1687
## snp_hits''' 0.0000 0.0000 0.14 0.8892
## data_source=bone -0.0001 0.0001 -0.81 0.4199
## data_source=bone (cranial) 0.0002 0.0002 1.34 0.1808
## data_source=bone (long bone) 0.0002 0.0001 1.82 0.0688
## data_source=bone (phalanx) 0.0001 0.0002 0.45 0.6527
## data_source=petrous -0.0001 0.0001 -1.03 0.3012
## data_source=tooth 0.0000 0.0001 0.63 0.5290
## data_source=tooth (canine) 0.0003 0.0004 0.91 0.3646
## data_source=tooth (incisor) -0.0001 0.0004 -0.33 0.7436
## data_source=tooth (molar) 0.0000 0.0001 0.29 0.7750
## data_source=tooth (premolar) -0.0002 0.0003 -0.84 0.4019
## data_source=tooth and bone (cranial) 0.0000 0.0006 -0.03 0.9775
##
#nonlinear age
(model_5 = ols(CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) + abs_lat + long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat long
## 173 175
## CAVIARBF_supl_table_CNT coverage
## 0 0
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) + abs_lat +
## long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source, data = d2_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1312 LR chi2 126.10 R2 0.092
## sigma0.0006 d.f. 25 R2 adj 0.074
## d.f. 1286 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.562e-03 -3.205e-04 -2.609e-05 3.014e-04 2.876e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0001 0.0002 0.31 0.7537
## ybp 0.0000 0.0000 0.67 0.5051
## ybp' 0.0000 0.0000 -2.02 0.0441
## ybp'' 0.0000 0.0000 2.25 0.0248
## abs_lat 0.0000 0.0000 -1.05 0.2924
## long 0.0000 0.0000 -1.81 0.0712
## CAVIARBF_supl_table_CNT 0.0000 0.0000 0.56 0.5747
## coverage -0.0036 0.0021 -1.70 0.0900
## coverage' 0.5306 0.2932 1.81 0.0706
## coverage'' -0.6570 0.3628 -1.81 0.0704
## coverage''' 0.1271 0.0701 1.81 0.0702
## snp_hits 0.0000 0.0000 1.90 0.0580
## snp_hits' 0.0000 0.0000 -1.63 0.1033
## snp_hits'' 0.0000 0.0000 1.47 0.1430
## snp_hits''' 0.0000 0.0000 0.04 0.9662
## data_source=bone -0.0001 0.0001 -0.72 0.4735
## data_source=bone (cranial) 0.0003 0.0002 1.55 0.1216
## data_source=bone (long bone) 0.0002 0.0001 1.98 0.0482
## data_source=bone (phalanx) 0.0001 0.0002 0.43 0.6669
## data_source=petrous 0.0000 0.0001 -0.70 0.4845
## data_source=tooth 0.0000 0.0001 0.70 0.4842
## data_source=tooth (canine) 0.0004 0.0004 1.06 0.2885
## data_source=tooth (incisor) -0.0001 0.0004 -0.26 0.7917
## data_source=tooth (molar) 0.0001 0.0001 1.01 0.3146
## data_source=tooth (premolar) -0.0001 0.0003 -0.48 0.6292
## data_source=tooth and bone (cranial) 0.0000 0.0006 0.06 0.9523
##
(model_5b = ols(CAVIARBF_supl_table_SCORE ~ rcs(ybp, 3) + abs_lat + long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat long
## 173 175
## CAVIARBF_supl_table_CNT coverage
## 0 0
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ rcs(ybp, 3) + abs_lat +
## long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source, data = d2_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1312 LR chi2 119.31 R2 0.087
## sigma0.0006 d.f. 24 R2 adj 0.070
## d.f. 1287 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.562e-03 -3.145e-04 -1.742e-05 2.939e-04 2.929e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0002 0.0002 0.84 0.4014
## ybp 0.0000 0.0000 -1.97 0.0489
## ybp' 0.0000 0.0000 1.35 0.1783
## abs_lat 0.0000 0.0000 -0.85 0.3959
## long 0.0000 0.0000 -1.99 0.0464
## CAVIARBF_supl_table_CNT 0.0000 0.0000 0.56 0.5754
## coverage -0.0035 0.0021 -1.62 0.1053
## coverage' 0.5138 0.2938 1.75 0.0806
## coverage'' -0.6366 0.3635 -1.75 0.0802
## coverage''' 0.1235 0.0703 1.76 0.0791
## snp_hits 0.0000 0.0000 1.80 0.0717
## snp_hits' 0.0000 0.0000 -1.53 0.1268
## snp_hits'' 0.0000 0.0000 1.35 0.1761
## snp_hits''' 0.0000 0.0000 0.17 0.8629
## data_source=bone 0.0000 0.0001 -0.44 0.6634
## data_source=bone (cranial) 0.0003 0.0002 1.51 0.1306
## data_source=bone (long bone) 0.0002 0.0001 2.14 0.0322
## data_source=bone (phalanx) 0.0001 0.0002 0.59 0.5538
## data_source=petrous 0.0000 0.0001 -0.57 0.5664
## data_source=tooth 0.0001 0.0001 1.15 0.2522
## data_source=tooth (canine) 0.0004 0.0004 1.03 0.3025
## data_source=tooth (incisor) -0.0001 0.0004 -0.24 0.8134
## data_source=tooth (molar) 0.0001 0.0001 0.81 0.4164
## data_source=tooth (premolar) -0.0002 0.0003 -0.64 0.5202
## data_source=tooth and bone (cranial) 0.0000 0.0006 0.06 0.9558
##
(model_5c = ols(CAVIARBF_supl_table_SCORE ~ rcs(ybp, 5) + abs_lat + long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat long
## 173 175
## CAVIARBF_supl_table_CNT coverage
## 0 0
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ rcs(ybp, 5) + abs_lat +
## long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source, data = d2_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1312 LR chi2 129.51 R2 0.094
## sigma0.0006 d.f. 26 R2 adj 0.076
## d.f. 1285 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.543e-03 -3.156e-04 -2.376e-05 3.074e-04 2.819e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0001 0.0002 -0.30 0.7637
## ybp 0.0000 0.0000 1.82 0.0689
## ybp' 0.0000 0.0000 -2.35 0.0187
## ybp'' 0.0000 0.0000 2.03 0.0427
## ybp''' 0.0000 0.0000 -1.94 0.0530
## abs_lat 0.0000 0.0000 -0.97 0.3343
## long 0.0000 0.0000 -2.08 0.0380
## CAVIARBF_supl_table_CNT 0.0000 0.0000 0.51 0.6103
## coverage -0.0037 0.0021 -1.72 0.0859
## coverage' 0.5345 0.2930 1.82 0.0683
## coverage'' -0.6618 0.3625 -1.83 0.0682
## coverage''' 0.1279 0.0701 1.83 0.0682
## snp_hits 0.0000 0.0000 1.93 0.0543
## snp_hits' 0.0000 0.0000 -1.65 0.0983
## snp_hits'' 0.0000 0.0000 1.49 0.1354
## snp_hits''' 0.0000 0.0000 -0.01 0.9919
## data_source=bone -0.0001 0.0001 -0.72 0.4727
## data_source=bone (cranial) 0.0003 0.0002 1.53 0.1268
## data_source=bone (long bone) 0.0002 0.0001 1.95 0.0517
## data_source=bone (phalanx) 0.0001 0.0002 0.43 0.6690
## data_source=petrous 0.0000 0.0001 -0.69 0.4890
## data_source=tooth 0.0000 0.0001 0.73 0.4633
## data_source=tooth (canine) 0.0004 0.0004 1.05 0.2959
## data_source=tooth (incisor) -0.0001 0.0004 -0.29 0.7754
## data_source=tooth (molar) 0.0001 0.0001 1.01 0.3120
## data_source=tooth (premolar) -0.0001 0.0003 -0.53 0.5956
## data_source=tooth and bone (cranial) 0.0001 0.0006 0.12 0.9055
##
#steps
lrtest(model_1, model_2)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp
## Model 2: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat
##
## L.R. Chisq d.f. P
## 2.064 1.000 0.151
lrtest(model_2, model_3)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat
## Model 2: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long
##
## L.R. Chisq d.f. P
## 2.8374 1.0000 0.0921
lrtest(model_3, model_4)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long
## Model 2: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long + CAVIARBF_supl_table_CNT +
## rcs(coverage) + rcs(snp_hits) + data_source
##
## L.R. Chisq d.f. P
## 9.43e+01 2.00e+01 1.30e-11
lrtest(model_4, model_5)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long + CAVIARBF_supl_table_CNT +
## rcs(coverage) + rcs(snp_hits) + data_source
## Model 2: CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) + abs_lat + long + CAVIARBF_supl_table_CNT +
## rcs(coverage) + rcs(snp_hits) + data_source
##
## L.R. Chisq d.f. P
## 8.6422 2.0000 0.0133
#plot expected age pattern
model_preds = data.frame(
ybp = 154:90000, #full range,
abs_lat = d2_euro$abs_lat %>% wtd_mean(), #mean covars
long = d2_euro$long %>% wtd_mean(), #mean covars
CAVIARBF_supl_table_CNT = d2_euro$CAVIARBF_supl_table_CNT %>% wtd_mean(),
coverage = d2_euro$coverage %>% wtd_mean(),
snp_hits = d2_euro$snp_hits %>% wtd_mean(),
data_source = "petrous", #mode
UN_continental_region_1 = "Europe" #mode
)
model_preds$model_1 = predict(model_1, newdata = model_preds)
model_preds$model_2 = predict(model_2, newdata = model_preds)
model_preds$model_3 = predict(model_3, newdata = model_preds)
model_preds$model_4 = predict(model_4, newdata = model_preds)
model_preds$model_5 = predict(model_5, newdata = model_preds)
model_preds$model_5b = predict(model_5b, newdata = model_preds)
model_preds$model_5c = predict(model_5c, newdata = model_preds)
#plot predictions
model_preds %>%
gather(key = model, value = prediction, starts_with("model_")) %>%
ggplot(aes(ybp, prediction, color = model)) +
geom_line() +
theme_bw()

So the shape of the function varies depending on the number of knots used for the spline. Tricky.
Near-moden Europeans only
#simple
(model_1 = ols(CAVIARBF_supl_table_SCORE ~ ybp, data = d2_NM_euro))
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp, data = d2_NM_euro)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 889 LR chi2 0.14 R2 0.000
## sigma0.0007 d.f. 1 R2 adj -0.001
## d.f. 887 Pr(> chi2) 0.7084 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.0023940 -0.0003477 -0.0001420 0.0003343 0.0031047
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0001 0.0001 2.25 0.0250
## ybp 0.0000 0.0000 0.37 0.7088
##
#latitude
(model_2 = ols(CAVIARBF_supl_table_SCORE ~ ybp + abs_lat, data = d2_NM_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat
## 130
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp + abs_lat, data = d2_NM_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 759 LR chi2 0.49 R2 0.001
## sigma0.0007 d.f. 2 R2 adj -0.002
## d.f. 756 Pr(> chi2) 0.7814 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.0023770 -0.0003565 -0.0001416 0.0003745 0.0030829
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0002 0.0003 0.86 0.3884
## ybp 0.0000 0.0000 -0.69 0.4884
## abs_lat 0.0000 0.0000 0.03 0.9747
##
#continental dummies
(model_3 = ols(CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long, data = d2_NM_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat long
## 130 132
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long,
## data = d2_NM_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 757 LR chi2 4.85 R2 0.006
## sigma0.0007 d.f. 3 R2 adj 0.002
## d.f. 753 Pr(> chi2) 0.1834 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.0023793 -0.0003443 -0.0001395 0.0003485 0.0031042
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0002 0.0003 0.74 0.4617
## ybp 0.0000 0.0000 -0.89 0.3716
## abs_lat 0.0000 0.0000 0.38 0.7028
## long 0.0000 0.0000 -2.05 0.0412
##
#quality
(model_4 = ols(CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2_NM_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat long
## 130 132
## CAVIARBF_supl_table_CNT coverage
## 0 0
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long +
## CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source, data = d2_NM_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 757 LR chi2 98.01 R2 0.121
## sigma0.0006 d.f. 21 R2 adj 0.096
## d.f. 735 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.426e-03 -3.315e-04 -9.681e-06 3.035e-04 2.968e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0001 0.0003 0.44 0.6607
## ybp 0.0000 0.0000 0.09 0.9318
## abs_lat 0.0000 0.0000 -0.70 0.4849
## long 0.0000 0.0000 -2.35 0.0192
## CAVIARBF_supl_table_CNT 0.0000 0.0000 0.64 0.5230
## coverage -0.0037 0.0033 -1.11 0.2694
## coverage' 0.6875 0.5214 1.32 0.1877
## coverage'' -0.8819 0.6621 -1.33 0.1833
## coverage''' 0.1992 0.1421 1.40 0.1615
## snp_hits 0.0000 0.0000 1.23 0.2201
## snp_hits' 0.0000 0.0000 -1.34 0.1811
## snp_hits'' 0.0000 0.0000 1.34 0.1809
## snp_hits''' 0.0000 0.0000 -1.16 0.2453
## data_source=bone -0.0002 0.0001 -1.58 0.1137
## data_source=bone (cranial) 0.0009 0.0006 1.49 0.1379
## data_source=bone (long bone) 0.0002 0.0001 1.39 0.1652
## data_source=bone (phalanx) -0.0002 0.0003 -0.57 0.5721
## data_source=petrous -0.0001 0.0001 -1.43 0.1525
## data_source=tooth 0.0000 0.0001 -0.46 0.6440
## data_source=tooth (canine) 0.0008 0.0006 1.27 0.2033
## data_source=tooth (incisor) -0.0001 0.0006 -0.10 0.9178
## data_source=tooth (molar) -0.0002 0.0002 -1.13 0.2586
##
#nonlinear age
(model_5 = ols(CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) + abs_lat + long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2_NM_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat long
## 130 132
## CAVIARBF_supl_table_CNT coverage
## 0 0
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) + abs_lat +
## long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source, data = d2_NM_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 757 LR chi2 104.50 R2 0.129
## sigma0.0006 d.f. 23 R2 adj 0.102
## d.f. 733 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.421e-03 -3.283e-04 -1.485e-05 3.067e-04 2.839e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0001 0.0003 -0.38 0.7039
## ybp 0.0000 0.0000 1.90 0.0580
## ybp' 0.0000 0.0000 -1.55 0.1219
## ybp'' 0.0000 0.0000 0.51 0.6136
## abs_lat 0.0000 0.0000 -0.79 0.4326
## long 0.0000 0.0000 -2.64 0.0084
## CAVIARBF_supl_table_CNT 0.0000 0.0000 0.63 0.5271
## coverage -0.0040 0.0033 -1.19 0.2343
## coverage' 0.7199 0.5206 1.38 0.1671
## coverage'' -0.9223 0.6611 -1.40 0.1634
## coverage''' 0.2068 0.1419 1.46 0.1453
## snp_hits 0.0000 0.0000 1.32 0.1870
## snp_hits' 0.0000 0.0000 -1.43 0.1532
## snp_hits'' 0.0000 0.0000 1.44 0.1510
## snp_hits''' 0.0000 0.0000 -1.28 0.2010
## data_source=bone -0.0002 0.0001 -1.70 0.0900
## data_source=bone (cranial) 0.0008 0.0006 1.32 0.1883
## data_source=bone (long bone) 0.0002 0.0001 1.37 0.1705
## data_source=bone (phalanx) -0.0002 0.0003 -0.67 0.5024
## data_source=petrous -0.0001 0.0001 -1.56 0.1199
## data_source=tooth 0.0000 0.0001 -0.56 0.5750
## data_source=tooth (canine) 0.0008 0.0006 1.29 0.1963
## data_source=tooth (incisor) -0.0001 0.0006 -0.12 0.9059
## data_source=tooth (molar) -0.0002 0.0002 -1.11 0.2689
##
(model_5b = ols(CAVIARBF_supl_table_SCORE ~ rcs(ybp, 3) + abs_lat + long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2_NM_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat long
## 130 132
## CAVIARBF_supl_table_CNT coverage
## 0 0
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ rcs(ybp, 3) + abs_lat +
## long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source, data = d2_NM_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 757 LR chi2 104.19 R2 0.129
## sigma0.0006 d.f. 22 R2 adj 0.102
## d.f. 734 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.422e-03 -3.284e-04 -1.245e-05 3.039e-04 2.857e-03
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0000 0.0003 -0.17 0.8687
## ybp 0.0000 0.0000 2.21 0.0277
## ybp' 0.0000 0.0000 -2.45 0.0144
## abs_lat 0.0000 0.0000 -0.83 0.4049
## long 0.0000 0.0000 -2.60 0.0096
## CAVIARBF_supl_table_CNT 0.0000 0.0000 0.61 0.5404
## coverage -0.0039 0.0033 -1.17 0.2443
## coverage' 0.7063 0.5197 1.36 0.1745
## coverage'' -0.9052 0.6600 -1.37 0.1706
## coverage''' 0.2033 0.1416 1.44 0.1515
## snp_hits 0.0000 0.0000 1.30 0.1951
## snp_hits' 0.0000 0.0000 -1.41 0.1599
## snp_hits'' 0.0000 0.0000 1.42 0.1572
## snp_hits''' 0.0000 0.0000 -1.27 0.2033
## data_source=bone -0.0002 0.0001 -1.75 0.0804
## data_source=bone (cranial) 0.0009 0.0006 1.34 0.1794
## data_source=bone (long bone) 0.0002 0.0001 1.39 0.1645
## data_source=bone (phalanx) -0.0002 0.0003 -0.70 0.4844
## data_source=petrous -0.0001 0.0001 -1.61 0.1083
## data_source=tooth -0.0001 0.0001 -0.65 0.5153
## data_source=tooth (canine) 0.0008 0.0006 1.28 0.2025
## data_source=tooth (incisor) -0.0001 0.0006 -0.14 0.8869
## data_source=tooth (molar) -0.0002 0.0002 -1.16 0.2470
##
(model_5c = ols(CAVIARBF_supl_table_SCORE ~ rcs(ybp, 5) + abs_lat + long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) + data_source, data = d2_NM_euro))
## Frequencies of Missing Values Due to Each Variable
## CAVIARBF_supl_table_SCORE ybp
## 0 0
## abs_lat long
## 130 132
## CAVIARBF_supl_table_CNT coverage
## 0 0
## snp_hits data_source
## 0 0
##
## Linear Regression Model
##
## ols(formula = CAVIARBF_supl_table_SCORE ~ rcs(ybp, 5) + abs_lat +
## long + CAVIARBF_supl_table_CNT + rcs(coverage) + rcs(snp_hits) +
## data_source, data = d2_NM_euro)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 757 LR chi2 104.60 R2 0.129
## sigma0.0006 d.f. 24 R2 adj 0.100
## d.f. 732 Pr(> chi2) 0.0000 g 0.000
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.0024292 -0.0003276 -0.0000175 0.0003047 0.0028262
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0002 0.0003 -0.48 0.6336
## ybp 0.0000 0.0000 1.58 0.1148
## ybp' 0.0000 0.0000 -0.96 0.3368
## ybp'' 0.0000 0.0000 0.39 0.6963
## ybp''' 0.0000 0.0000 -0.12 0.9006
## abs_lat 0.0000 0.0000 -0.72 0.4693
## long 0.0000 0.0000 -2.66 0.0081
## CAVIARBF_supl_table_CNT 0.0000 0.0000 0.64 0.5211
## coverage -0.0040 0.0033 -1.19 0.2342
## coverage' 0.7215 0.5210 1.38 0.1665
## coverage'' -0.9244 0.6616 -1.40 0.1628
## coverage''' 0.2074 0.1420 1.46 0.1446
## snp_hits 0.0000 0.0000 1.32 0.1868
## snp_hits' 0.0000 0.0000 -1.44 0.1514
## snp_hits'' 0.0000 0.0000 1.44 0.1492
## snp_hits''' 0.0000 0.0000 -1.28 0.1995
## data_source=bone -0.0002 0.0001 -1.67 0.0959
## data_source=bone (cranial) 0.0008 0.0006 1.31 0.1891
## data_source=bone (long bone) 0.0002 0.0001 1.40 0.1631
## data_source=bone (phalanx) -0.0002 0.0003 -0.67 0.5023
## data_source=petrous -0.0001 0.0001 -1.55 0.1220
## data_source=tooth 0.0000 0.0001 -0.56 0.5759
## data_source=tooth (canine) 0.0008 0.0006 1.29 0.1985
## data_source=tooth (incisor) -0.0001 0.0006 -0.12 0.9051
## data_source=tooth (molar) -0.0002 0.0002 -1.11 0.2687
##
#steps
lrtest(model_1, model_2)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp
## Model 2: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat
##
## L.R. Chisq d.f. P
## 0.353 1.000 0.552
lrtest(model_2, model_3)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat
## Model 2: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long
##
## L.R. Chisq d.f. P
## 4.353 1.000 0.037
lrtest(model_3, model_4)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long
## Model 2: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long + CAVIARBF_supl_table_CNT +
## rcs(coverage) + rcs(snp_hits) + data_source
##
## L.R. Chisq d.f. P
## 9.32e+01 1.80e+01 3.89e-12
lrtest(model_4, model_5)
##
## Model 1: CAVIARBF_supl_table_SCORE ~ ybp + abs_lat + long + CAVIARBF_supl_table_CNT +
## rcs(coverage) + rcs(snp_hits) + data_source
## Model 2: CAVIARBF_supl_table_SCORE ~ rcs(ybp, 4) + abs_lat + long + CAVIARBF_supl_table_CNT +
## rcs(coverage) + rcs(snp_hits) + data_source
##
## L.R. Chisq d.f. P
## 6.487 2.000 0.039
#plot expected age pattern
model_preds = data.frame(
ybp = 0:5000, #full range,
abs_lat = d2_euro$abs_lat %>% wtd_mean(), #mean covars
long = d2_euro$long %>% wtd_mean(), #mean covars
CAVIARBF_supl_table_CNT = d2_euro$CAVIARBF_supl_table_CNT %>% wtd_mean(),
coverage = d2_euro$coverage %>% wtd_mean(),
snp_hits = d2_euro$snp_hits %>% wtd_mean(),
data_source = "petrous", #mode
UN_continental_region_1 = "Europe" #mode
)
model_preds$model_1 = predict(model_1, newdata = model_preds)
model_preds$model_2 = predict(model_2, newdata = model_preds)
model_preds$model_3 = predict(model_3, newdata = model_preds)
model_preds$model_4 = predict(model_4, newdata = model_preds)
model_preds$model_5 = predict(model_5, newdata = model_preds)
model_preds$model_5b = predict(model_5b, newdata = model_preds)
model_preds$model_5c = predict(model_5c, newdata = model_preds)
#plot predictions
model_preds %>%
gather(key = model, value = prediction, starts_with("model_")) %>%
ggplot(aes(ybp, prediction, color = model)) +
geom_line() +
theme_bw()

#plot
Polygenic score correlations
#all data
wtd.cors(d %>% select(ends_with("SCORE")))
## CAVIARBF_supl_table_SCORE
## CAVIARBF_supl_table_SCORE 1.00000
## GWAS_EA_excl23andMe_SCORE -0.00928
## MTAG_EA.to10K_SCORE 0.15447
## mtag_edu_lead_snps_gwas_SCORE 0.20368
## GWAS_EA_excl23andMe_SCORE
## CAVIARBF_supl_table_SCORE -0.00928
## GWAS_EA_excl23andMe_SCORE 1.00000
## MTAG_EA.to10K_SCORE 0.57631
## mtag_edu_lead_snps_gwas_SCORE 0.59114
## MTAG_EA.to10K_SCORE
## CAVIARBF_supl_table_SCORE 0.154
## GWAS_EA_excl23andMe_SCORE 0.576
## MTAG_EA.to10K_SCORE 1.000
## mtag_edu_lead_snps_gwas_SCORE 0.713
## mtag_edu_lead_snps_gwas_SCORE
## CAVIARBF_supl_table_SCORE 0.204
## GWAS_EA_excl23andMe_SCORE 0.591
## MTAG_EA.to10K_SCORE 0.713
## mtag_edu_lead_snps_gwas_SCORE 1.000
#old data
wtd.cors(d2 %>% select(ends_with("SCORE")))
## CAVIARBF_supl_table_SCORE
## CAVIARBF_supl_table_SCORE 1.0000
## GWAS_EA_excl23andMe_SCORE 0.0668
## MTAG_EA.to10K_SCORE 0.1871
## mtag_edu_lead_snps_gwas_SCORE 0.2564
## GWAS_EA_excl23andMe_SCORE
## CAVIARBF_supl_table_SCORE 0.0668
## GWAS_EA_excl23andMe_SCORE 1.0000
## MTAG_EA.to10K_SCORE 0.4409
## mtag_edu_lead_snps_gwas_SCORE 0.4038
## MTAG_EA.to10K_SCORE
## CAVIARBF_supl_table_SCORE 0.187
## GWAS_EA_excl23andMe_SCORE 0.441
## MTAG_EA.to10K_SCORE 1.000
## mtag_edu_lead_snps_gwas_SCORE 0.646
## mtag_edu_lead_snps_gwas_SCORE
## CAVIARBF_supl_table_SCORE 0.256
## GWAS_EA_excl23andMe_SCORE 0.404
## MTAG_EA.to10K_SCORE 0.646
## mtag_edu_lead_snps_gwas_SCORE 1.000
#new data
wtd.cors(d %>% filter(ybp <= 150) %>% select(ends_with("SCORE")))
## CAVIARBF_supl_table_SCORE
## CAVIARBF_supl_table_SCORE 1.0000
## GWAS_EA_excl23andMe_SCORE -0.0145
## MTAG_EA.to10K_SCORE 0.1532
## mtag_edu_lead_snps_gwas_SCORE 0.1873
## GWAS_EA_excl23andMe_SCORE
## CAVIARBF_supl_table_SCORE -0.0145
## GWAS_EA_excl23andMe_SCORE 1.0000
## MTAG_EA.to10K_SCORE 0.6051
## mtag_edu_lead_snps_gwas_SCORE 0.6273
## MTAG_EA.to10K_SCORE
## CAVIARBF_supl_table_SCORE 0.153
## GWAS_EA_excl23andMe_SCORE 0.605
## MTAG_EA.to10K_SCORE 1.000
## mtag_edu_lead_snps_gwas_SCORE 0.731
## mtag_edu_lead_snps_gwas_SCORE
## CAVIARBF_supl_table_SCORE 0.187
## GWAS_EA_excl23andMe_SCORE 0.627
## MTAG_EA.to10K_SCORE 0.731
## mtag_edu_lead_snps_gwas_SCORE 1.000