library(brolgar) #example data set for wages
Warning: replacing previous import ‘lifecycle::last_warnings’ by ‘rlang::last_warnings’ when loading ‘tibble’Warning: replacing previous import ‘lifecycle::last_warnings’ by ‘rlang::last_warnings’ when loading ‘pillar’
library(tidyverse)
library(plotly)
library(fdapace) #functional data anlaysis
head(wages)
wages %>%
select(ln_wages,xp) %>%
summary()
ln_wages xp id
Min. :0.708 Min. : 0.001 Min. : 31
1st Qu.:1.591 1st Qu.: 1.609 1st Qu.: 3194
Median :1.842 Median : 3.451 Median : 6582
Mean :1.897 Mean : 3.957 Mean : 6301
3rd Qu.:2.140 3rd Qu.: 5.949 3rd Qu.: 9300
Max. :4.304 Max. :12.700 Max. :12543
Browse over longitudinal data graphically and analytically in R
wages_t <- as_tsibble(wages,
key=id,
index=xp,
regular=FALSE)
num_obs <- wages_t %>% features(ln_wages,n_obs)
summary(num_obs$n_obs)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 5.000 8.000 7.209 9.000 13.000
We filter out subjects with less than 3 observations
df <- wages_t %>%
add_n_obs() %>%
filter(n_obs > 3)
set.seed(123)
df %>%
sample_n_keys(size=10) %>%
ggplot(aes(x=xp,y=ln_wages,group=id,color=id)) +
geom_line() +
theme_bw()+
labs(x='experience',y='log_wage')+
theme(legend.position='none')
df %>%
ggplot(aes(x=xp,y=ln_wages,group=id,color=id)) +
geom_line()+
facet_sample(n_per_facet=3,n_facets=20)+
theme_bw()+
theme(legend.position = "none")
df2 <- df %>%
select(id,xp,ln_wages) %>%
as_tibble()
uid <- unique(df2$id)
N <- length(uid)
Wages <- numeric(N)
Exp <- numeric(N)
for(k in 1:N) {
Wages[k] = df2 %>%
filter(id==uid[k]) %>%
select(ln_wages) %>%
pull() %>%
list()
Exp[k] = df2 %>%
filter(id==uid[k]) %>%
select(xp) %>%
pull() %>%
list()
}
df3 <- tibble(uid,Wages,Exp)
glimpse(df3)
Rows: 764
Columns: 3
$ uid <int> 31, 36, 53, 122, 134, 145, 155, 173, 207, 222, 223, 226, 234, 241, 248, 253, 295, 2…
$ Wages <list> [<1.491, 1.433, 1.469, 1.749, 1.931, 1.709, 2.086, 2.129>, <1.982, 1.798, 2.256, 2…
$ Exp <list> [<0.015, 0.715, 1.734, 2.773, 3.927, 4.946, 5.965, 6.984>, <0.315, 0.983, 2.040, 3…
wage_input <- MakeFPCAInputs(IDs=df2$id,tVec=df2$xp,yVec=df2$ln_wages)
res_wages <- FPCA(Ly=wage_input$Ly,Lt=wage_input$Lt)
plot(res_wages)
res_wages$cumFVE %>%
round(3)
[1] 0.827 0.940 0.971 0.995
CreatePathPlot(res_wages,subset=1:2,main="Estimated Paths for IDs 31 and 36")
lines(x={df2 %>%
filter(id==31) %>%
select(xp) %>% pull()},
y={df2 %>%
filter(id==31) %>%
select(ln_wages) %>%
pull()},col='blue')
lines(x={df2 %>%
filter(id==36) %>%
select(xp) %>% pull()},
y={df2 %>%
filter(id==36) %>%
select(ln_wages) %>%
pull()},col='blue')
NA