library(haven)
library(dplyr)
library(readxl)
library(tictoc)
library(CTT)#sayac
library(psych)
#Bu kod baya işlevsel oldu
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
veri <- read_sav("SCREEN.sav")
veri <- expss::drop_var_labs(veri)
describe(veri[,-1])
## vars n mean sd median trimmed mad min max range skew kurtosis
## TIMEDRS 1 465 7.90 10.95 4 5.61 4.45 0 81 81 3.23 12.88
## ATTDRUG 2 465 7.69 1.16 8 7.71 1.48 5 10 5 -0.12 -0.47
## ATTHOUSE 3 464 23.54 4.48 24 23.62 4.45 2 35 33 -0.45 1.51
## INCOME 4 439 4.21 2.42 4 4.01 2.97 1 10 9 0.58 -0.38
## EMPLMNT 5 465 0.47 0.50 0 0.46 0.00 0 1 1 0.12 -1.99
## MSTATUS 6 465 1.78 0.42 2 1.85 0.00 1 2 1 -1.34 -0.21
## RACE 7 465 1.09 0.28 1 1.00 0.00 1 2 1 2.90 6.40
## se
## TIMEDRS 0.51
## ATTDRUG 0.05
## ATTHOUSE 0.21
## INCOME 0.12
## EMPLMNT 0.02
## MSTATUS 0.02
## RACE 0.01
library(DT)
DT::datatable(veri[-1])
#bu paket ve fonksiyon html çiktisi veriyo. işlevsel olabilir
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.4.2
#create_report(veri)
library(naniar)
## Warning: package 'naniar' was built under R version 4.4.2
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
any_na(veri) # true ise kayip veri var
## [1] TRUE
n_miss(veri)
## [1] 27
prop_miss(veri)
## [1] 0.007258065
veri %>% is.na() %>% colSums()
## SUBNO TIMEDRS ATTDRUG ATTHOUSE INCOME EMPLMNT MSTATUS RACE
## 0 0 0 1 26 0 0 0
miss_var_summary(veri)
## # A tibble: 8 × 3
## variable n_miss pct_miss
## <chr> <int> <num>
## 1 INCOME 26 5.59
## 2 ATTHOUSE 1 0.215
## 3 SUBNO 0 0
## 4 TIMEDRS 0 0
## 5 ATTDRUG 0 0
## 6 EMPLMNT 0 0
## 7 MSTATUS 0 0
## 8 RACE 0 0
miss_var_table(veri)
## # A tibble: 3 × 3
## n_miss_in_var n_vars pct_vars
## <int> <int> <dbl>
## 1 0 6 75
## 2 1 1 12.5
## 3 26 1 12.5
miss_case_summary(veri)
## # A tibble: 465 × 3
## case n_miss pct_miss
## <int> <int> <dbl>
## 1 52 1 12.5
## 2 64 1 12.5
## 3 69 1 12.5
## 4 77 1 12.5
## 5 118 1 12.5
## 6 135 1 12.5
## 7 161 1 12.5
## 8 172 1 12.5
## 9 173 1 12.5
## 10 174 1 12.5
## # ℹ 455 more rows
gg_miss_var(veri)
vis_miss(veri)
na.omit(veri)
## # A tibble: 438 × 8
## SUBNO TIMEDRS ATTDRUG ATTHOUSE INCOME EMPLMNT MSTATUS RACE
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 8 27 5 1 2 1
## 2 2 3 7 20 6 0 2 1
## 3 3 0 8 23 3 0 2 1
## 4 4 13 9 28 8 1 2 1
## 5 5 15 7 24 1 1 2 1
## 6 6 3 8 25 4 0 2 1
## 7 7 2 7 30 6 1 2 1
## 8 8 0 7 24 6 1 2 1
## 9 9 7 7 20 2 1 2 1
## 10 10 4 8 30 8 0 1 1
## # ℹ 428 more rows
#ornek bul
veri_mean <- veri
veri_mean$INCOME[is.na(veri_mean$INCOME)] <- mean(veri_mean$INCOME,na.rm = T)
sd(veri$INCOME,na.rm = T) - sd(veri_mean$INCOME,na.rm = T) #değişkenlik düştü
## [1] 0.06874715
library(missMethods)
## Warning: package 'missMethods' was built under R version 4.4.2
##
## Attaching package: 'missMethods'
## The following objects are masked from 'package:naniar':
##
## impute_mean, impute_median, impute_mode
veri_em <- impute_EM(ds = veri,stochastic = F)
library(mice)
## Warning: package 'mice' was built under R version 4.4.2
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
imputed_data <- mice(data = veri,m = 5,method = "pmm",seed = 500, maxit = 50)
##
## iter imp variable
## 1 1 ATTHOUSE INCOME
## 1 2 ATTHOUSE INCOME
## 1 3 ATTHOUSE INCOME
## 1 4 ATTHOUSE INCOME
## 1 5 ATTHOUSE INCOME
## 2 1 ATTHOUSE INCOME
## 2 2 ATTHOUSE INCOME
## 2 3 ATTHOUSE INCOME
## 2 4 ATTHOUSE INCOME
## 2 5 ATTHOUSE INCOME
## 3 1 ATTHOUSE INCOME
## 3 2 ATTHOUSE INCOME
## 3 3 ATTHOUSE INCOME
## 3 4 ATTHOUSE INCOME
## 3 5 ATTHOUSE INCOME
## 4 1 ATTHOUSE INCOME
## 4 2 ATTHOUSE INCOME
## 4 3 ATTHOUSE INCOME
## 4 4 ATTHOUSE INCOME
## 4 5 ATTHOUSE INCOME
## 5 1 ATTHOUSE INCOME
## 5 2 ATTHOUSE INCOME
## 5 3 ATTHOUSE INCOME
## 5 4 ATTHOUSE INCOME
## 5 5 ATTHOUSE INCOME
## 6 1 ATTHOUSE INCOME
## 6 2 ATTHOUSE INCOME
## 6 3 ATTHOUSE INCOME
## 6 4 ATTHOUSE INCOME
## 6 5 ATTHOUSE INCOME
## 7 1 ATTHOUSE INCOME
## 7 2 ATTHOUSE INCOME
## 7 3 ATTHOUSE INCOME
## 7 4 ATTHOUSE INCOME
## 7 5 ATTHOUSE INCOME
## 8 1 ATTHOUSE INCOME
## 8 2 ATTHOUSE INCOME
## 8 3 ATTHOUSE INCOME
## 8 4 ATTHOUSE INCOME
## 8 5 ATTHOUSE INCOME
## 9 1 ATTHOUSE INCOME
## 9 2 ATTHOUSE INCOME
## 9 3 ATTHOUSE INCOME
## 9 4 ATTHOUSE INCOME
## 9 5 ATTHOUSE INCOME
## 10 1 ATTHOUSE INCOME
## 10 2 ATTHOUSE INCOME
## 10 3 ATTHOUSE INCOME
## 10 4 ATTHOUSE INCOME
## 10 5 ATTHOUSE INCOME
## 11 1 ATTHOUSE INCOME
## 11 2 ATTHOUSE INCOME
## 11 3 ATTHOUSE INCOME
## 11 4 ATTHOUSE INCOME
## 11 5 ATTHOUSE INCOME
## 12 1 ATTHOUSE INCOME
## 12 2 ATTHOUSE INCOME
## 12 3 ATTHOUSE INCOME
## 12 4 ATTHOUSE INCOME
## 12 5 ATTHOUSE INCOME
## 13 1 ATTHOUSE INCOME
## 13 2 ATTHOUSE INCOME
## 13 3 ATTHOUSE INCOME
## 13 4 ATTHOUSE INCOME
## 13 5 ATTHOUSE INCOME
## 14 1 ATTHOUSE INCOME
## 14 2 ATTHOUSE INCOME
## 14 3 ATTHOUSE INCOME
## 14 4 ATTHOUSE INCOME
## 14 5 ATTHOUSE INCOME
## 15 1 ATTHOUSE INCOME
## 15 2 ATTHOUSE INCOME
## 15 3 ATTHOUSE INCOME
## 15 4 ATTHOUSE INCOME
## 15 5 ATTHOUSE INCOME
## 16 1 ATTHOUSE INCOME
## 16 2 ATTHOUSE INCOME
## 16 3 ATTHOUSE INCOME
## 16 4 ATTHOUSE INCOME
## 16 5 ATTHOUSE INCOME
## 17 1 ATTHOUSE INCOME
## 17 2 ATTHOUSE INCOME
## 17 3 ATTHOUSE INCOME
## 17 4 ATTHOUSE INCOME
## 17 5 ATTHOUSE INCOME
## 18 1 ATTHOUSE INCOME
## 18 2 ATTHOUSE INCOME
## 18 3 ATTHOUSE INCOME
## 18 4 ATTHOUSE INCOME
## 18 5 ATTHOUSE INCOME
## 19 1 ATTHOUSE INCOME
## 19 2 ATTHOUSE INCOME
## 19 3 ATTHOUSE INCOME
## 19 4 ATTHOUSE INCOME
## 19 5 ATTHOUSE INCOME
## 20 1 ATTHOUSE INCOME
## 20 2 ATTHOUSE INCOME
## 20 3 ATTHOUSE INCOME
## 20 4 ATTHOUSE INCOME
## 20 5 ATTHOUSE INCOME
## 21 1 ATTHOUSE INCOME
## 21 2 ATTHOUSE INCOME
## 21 3 ATTHOUSE INCOME
## 21 4 ATTHOUSE INCOME
## 21 5 ATTHOUSE INCOME
## 22 1 ATTHOUSE INCOME
## 22 2 ATTHOUSE INCOME
## 22 3 ATTHOUSE INCOME
## 22 4 ATTHOUSE INCOME
## 22 5 ATTHOUSE INCOME
## 23 1 ATTHOUSE INCOME
## 23 2 ATTHOUSE INCOME
## 23 3 ATTHOUSE INCOME
## 23 4 ATTHOUSE INCOME
## 23 5 ATTHOUSE INCOME
## 24 1 ATTHOUSE INCOME
## 24 2 ATTHOUSE INCOME
## 24 3 ATTHOUSE INCOME
## 24 4 ATTHOUSE INCOME
## 24 5 ATTHOUSE INCOME
## 25 1 ATTHOUSE INCOME
## 25 2 ATTHOUSE INCOME
## 25 3 ATTHOUSE INCOME
## 25 4 ATTHOUSE INCOME
## 25 5 ATTHOUSE INCOME
## 26 1 ATTHOUSE INCOME
## 26 2 ATTHOUSE INCOME
## 26 3 ATTHOUSE INCOME
## 26 4 ATTHOUSE INCOME
## 26 5 ATTHOUSE INCOME
## 27 1 ATTHOUSE INCOME
## 27 2 ATTHOUSE INCOME
## 27 3 ATTHOUSE INCOME
## 27 4 ATTHOUSE INCOME
## 27 5 ATTHOUSE INCOME
## 28 1 ATTHOUSE INCOME
## 28 2 ATTHOUSE INCOME
## 28 3 ATTHOUSE INCOME
## 28 4 ATTHOUSE INCOME
## 28 5 ATTHOUSE INCOME
## 29 1 ATTHOUSE INCOME
## 29 2 ATTHOUSE INCOME
## 29 3 ATTHOUSE INCOME
## 29 4 ATTHOUSE INCOME
## 29 5 ATTHOUSE INCOME
## 30 1 ATTHOUSE INCOME
## 30 2 ATTHOUSE INCOME
## 30 3 ATTHOUSE INCOME
## 30 4 ATTHOUSE INCOME
## 30 5 ATTHOUSE INCOME
## 31 1 ATTHOUSE INCOME
## 31 2 ATTHOUSE INCOME
## 31 3 ATTHOUSE INCOME
## 31 4 ATTHOUSE INCOME
## 31 5 ATTHOUSE INCOME
## 32 1 ATTHOUSE INCOME
## 32 2 ATTHOUSE INCOME
## 32 3 ATTHOUSE INCOME
## 32 4 ATTHOUSE INCOME
## 32 5 ATTHOUSE INCOME
## 33 1 ATTHOUSE INCOME
## 33 2 ATTHOUSE INCOME
## 33 3 ATTHOUSE INCOME
## 33 4 ATTHOUSE INCOME
## 33 5 ATTHOUSE INCOME
## 34 1 ATTHOUSE INCOME
## 34 2 ATTHOUSE INCOME
## 34 3 ATTHOUSE INCOME
## 34 4 ATTHOUSE INCOME
## 34 5 ATTHOUSE INCOME
## 35 1 ATTHOUSE INCOME
## 35 2 ATTHOUSE INCOME
## 35 3 ATTHOUSE INCOME
## 35 4 ATTHOUSE INCOME
## 35 5 ATTHOUSE INCOME
## 36 1 ATTHOUSE INCOME
## 36 2 ATTHOUSE INCOME
## 36 3 ATTHOUSE INCOME
## 36 4 ATTHOUSE INCOME
## 36 5 ATTHOUSE INCOME
## 37 1 ATTHOUSE INCOME
## 37 2 ATTHOUSE INCOME
## 37 3 ATTHOUSE INCOME
## 37 4 ATTHOUSE INCOME
## 37 5 ATTHOUSE INCOME
## 38 1 ATTHOUSE INCOME
## 38 2 ATTHOUSE INCOME
## 38 3 ATTHOUSE INCOME
## 38 4 ATTHOUSE INCOME
## 38 5 ATTHOUSE INCOME
## 39 1 ATTHOUSE INCOME
## 39 2 ATTHOUSE INCOME
## 39 3 ATTHOUSE INCOME
## 39 4 ATTHOUSE INCOME
## 39 5 ATTHOUSE INCOME
## 40 1 ATTHOUSE INCOME
## 40 2 ATTHOUSE INCOME
## 40 3 ATTHOUSE INCOME
## 40 4 ATTHOUSE INCOME
## 40 5 ATTHOUSE INCOME
## 41 1 ATTHOUSE INCOME
## 41 2 ATTHOUSE INCOME
## 41 3 ATTHOUSE INCOME
## 41 4 ATTHOUSE INCOME
## 41 5 ATTHOUSE INCOME
## 42 1 ATTHOUSE INCOME
## 42 2 ATTHOUSE INCOME
## 42 3 ATTHOUSE INCOME
## 42 4 ATTHOUSE INCOME
## 42 5 ATTHOUSE INCOME
## 43 1 ATTHOUSE INCOME
## 43 2 ATTHOUSE INCOME
## 43 3 ATTHOUSE INCOME
## 43 4 ATTHOUSE INCOME
## 43 5 ATTHOUSE INCOME
## 44 1 ATTHOUSE INCOME
## 44 2 ATTHOUSE INCOME
## 44 3 ATTHOUSE INCOME
## 44 4 ATTHOUSE INCOME
## 44 5 ATTHOUSE INCOME
## 45 1 ATTHOUSE INCOME
## 45 2 ATTHOUSE INCOME
## 45 3 ATTHOUSE INCOME
## 45 4 ATTHOUSE INCOME
## 45 5 ATTHOUSE INCOME
## 46 1 ATTHOUSE INCOME
## 46 2 ATTHOUSE INCOME
## 46 3 ATTHOUSE INCOME
## 46 4 ATTHOUSE INCOME
## 46 5 ATTHOUSE INCOME
## 47 1 ATTHOUSE INCOME
## 47 2 ATTHOUSE INCOME
## 47 3 ATTHOUSE INCOME
## 47 4 ATTHOUSE INCOME
## 47 5 ATTHOUSE INCOME
## 48 1 ATTHOUSE INCOME
## 48 2 ATTHOUSE INCOME
## 48 3 ATTHOUSE INCOME
## 48 4 ATTHOUSE INCOME
## 48 5 ATTHOUSE INCOME
## 49 1 ATTHOUSE INCOME
## 49 2 ATTHOUSE INCOME
## 49 3 ATTHOUSE INCOME
## 49 4 ATTHOUSE INCOME
## 49 5 ATTHOUSE INCOME
## 50 1 ATTHOUSE INCOME
## 50 2 ATTHOUSE INCOME
## 50 3 ATTHOUSE INCOME
## 50 4 ATTHOUSE INCOME
## 50 5 ATTHOUSE INCOME
linreg <- with(imputed_data,lm(TIMEDRS ~ ATTHOUSE + INCOME))
pooled_results <- mice::pool(linreg)
summary(pooled_results)
## term estimate std.error statistic df p.value
## 1 (Intercept) -0.4940885 2.8351124 -0.1742747 458.9825 0.861726397
## 2 ATTHOUSE 0.3139818 0.1125528 2.7896404 459.9387 0.005495781
## 3 INCOME 0.2385298 0.2110460 1.1302265 434.9224 0.259003705
imp1 <- complete(imputed_data,1)
imp2 <- complete(imputed_data,2)
imp3 <- complete(imputed_data,3)
imp4 <- complete(imputed_data,4)
imp5 <- complete(imputed_data,5)
m1<-lm(TIMEDRS ~ ATTHOUSE + INCOME,data = imp1)
m2<-lm(TIMEDRS ~ ATTHOUSE + INCOME,data = imp2)
m3<-lm(TIMEDRS ~ ATTHOUSE + INCOME,data = imp3)
m4<-lm(TIMEDRS ~ ATTHOUSE + INCOME,data = imp4)
m5<-lm(TIMEDRS ~ ATTHOUSE + INCOME,data = imp5)
sum(m1$coefficients[2]+
m2$coefficients[2]+
m3$coefficients[2]+
m4$coefficients[2]+
m5$coefficients[2])/5
## [1] 0.3139818
#pool() ile sonuçları birleştir
library(tuev)
data <- TIMSS19_asgturm7 %>%
select(ASSREA01,starts_with("ASBM02")) %>%
mutate(LIKE = ASBM02A+ASBM02B+ASBM02C+ASBM02D+ASBM02E+ASBM02F+ASBM02G+ASBM02H+ASBM02I) %>%
select(ASSREA01,LIKE) %>%
expss::drop_var_labs() # MATEMATİĞİ SEVME VE BASARI PUANI PV1
data %>% is.na() %>% colSums()
## ASSREA01 LIKE
## 0 270
miss_var_table(data)
## # A tibble: 2 × 3
## n_miss_in_var n_vars pct_vars
## <int> <int> <dbl>
## 1 0 1 50
## 2 270 1 50
gg_miss_var(data)
mcar_test(data = data)
## # A tibble: 1 × 4
## statistic df p.value missing.patterns
## <dbl> <dbl> <dbl> <int>
## 1 52.1 1 5.17e-13 2
Kayıp verinin mekanizması MCAR’dan istatistiksel olarak anlamlı düzeyde farklılaştığı belirlenmiştir (5.1703086^{-13}). Matematiği sevme ölçeğinde yer alan maddelerin “okuma başarı puanı (PV!1)” değişkeni ile ilişkili olup olmadığını test edelim.
data2 <- data
sutun <- colnames(data2)
miss_test <- data2 %>%mutate(missing_status = is.na(LIKE))
# income değişkeninde eksik verisi olmayanlar
COL2NONNA <- miss_test %>% filter(missing_status == FALSE) %>%
pull(ASSREA01)
# income değişkeninde eksik verisi olmayanlar
COL2NA <- miss_test %>% filter(missing_status == TRUE) %>%
pull(ASSREA01)
#c Oran
t.test(COL2NONNA,y = COL2NA)
##
## Welch Two Sample t-test
##
## data: COL2NONNA and COL2NA
## t = 6.8496, df = 303.7, p-value = 4.118e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 27.92564 50.43871
## sample estimates:
## mean of x mean of y
## 524.9107 485.7286
data_list <- na.omit(data)
psych::describe(data_list,fast = T)
## vars n mean sd median min max range skew kurtosis se
## ASSREA01 1 3758 524.91 85.17 533.19 194.81 782.32 587.5 -0.54 0.34 1.39
## LIKE 2 3758 17.30 3.74 16.00 9.00 36.00 27.0 1.60 3.71 0.06
data3 <- data
data3$LIKE[is.na(data3$LIKE)] <- mean(data3$LIKE, na.rm=TRUE)
psych::describe(data3,fast = T)
## vars n mean sd median min max range skew kurtosis se
## ASSREA01 1 4028 522.28 86.13 530.96 169.99 782.32 612.33 -0.54 0.33 1.36
## LIKE 2 4028 17.30 3.61 16.00 9.00 36.00 27.00 1.66 4.19 0.06
library(missMethods)
data_EM <- impute_EM(data,stochastic = F)
psych::describe(data_EM,fast = T)
## vars n mean sd median min max range skew kurtosis se
## ASSREA01 1 4028 522.28 86.13 530.96 169.99 782.32 612.33 -0.54 0.33 1.36
## LIKE 2 4028 17.31 3.61 16.00 9.00 36.00 27.00 1.65 4.18 0.06
library(mice)
data_MI <- mice(data = data,m = 5,method = "pmm",maxit = 5,seed = 59)
##
## iter imp variable
## 1 1 LIKE
## 1 2 LIKE
## 1 3 LIKE
## 1 4 LIKE
## 1 5 LIKE
## 2 1 LIKE
## 2 2 LIKE
## 2 3 LIKE
## 2 4 LIKE
## 2 5 LIKE
## 3 1 LIKE
## 3 2 LIKE
## 3 3 LIKE
## 3 4 LIKE
## 3 5 LIKE
## 4 1 LIKE
## 4 2 LIKE
## 4 3 LIKE
## 4 4 LIKE
## 4 5 LIKE
## 5 1 LIKE
## 5 2 LIKE
## 5 3 LIKE
## 5 4 LIKE
## 5 5 LIKE
fit <- with(data_MI,expr = lm(ASSREA01 ~ LIKE))
summary(pool(fit))
## term estimate std.error statistic df p.value
## 1 (Intercept) 541.011463 6.9029922 78.373471 200.2628 3.112930e-152
## 2 LIKE -1.082272 0.3904842 -2.771614 194.2682 6.120257e-03
TRUSA <- readRDS("D:/doktora/OLC733/hafta02/hafta02odev/TRUSA.RDS")
TRUSA <- TRUSA %>%
relocate(CNT,IDSTUD,IDBOOK)
miss_var_table(TRUSA)
## # A tibble: 1 × 3
## n_miss_in_var n_vars pct_vars
## <int> <int> <dbl>
## 1 0 38 100
TRUSA <- TRUSA %>%
mutate(SUMSCORE = rowSums(TRUSA[,4:38]))
TRUSA %>%
select(CNT,SUMSCORE) %>%
group_by(CNT) %>%
summarise(
N = n(),
Mean = mean(SUMSCORE),
SD = sd(SUMSCORE),
SKEW = skew(SUMSCORE),
KURT = kurtosi(SUMSCORE)
) %>% ungroup()
## # A tibble: 2 × 6
## CNT N Mean SD SKEW KURT
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 TUR 435 13.5 7.57 0.713 -0.614
## 2 USA 716 17.0 7.53 0.0959 -0.896
tr_toplam <- TRUSA %>% filter(CNT == "TUR") %>% select(SUMSCORE) %>% unlist()
usa_toplam <- TRUSA %>% filter(CNT == "USA") %>% select(SUMSCORE) %>% unlist()
t.test(tr_toplam,usa_toplam,paired = F,var.equal = T)
##
## Two Sample t-test
##
## data: tr_toplam and usa_toplam
## t = -7.8348, df = 1149, p-value = 1.064e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.493049 -2.693382
## sample estimates:
## mean of x mean of y
## 13.45287 17.04609
set.seed(59)
TRUSA_05 <- delete_MCAR(TRUSA[,4:38],p = 0.05)
TRUSA_05 <- cbind(TRUSA_05,CNT = TRUSA$CNT)
TRUSA_10 <- delete_MCAR(TRUSA[,4:38],p = 0.10)
TRUSA_10 <- cbind(TRUSA_10,CNT = TRUSA$CNT)
TRUSA_15 <- delete_MCAR(TRUSA[,4:38],p = 0.15)
TRUSA_15 <- cbind(TRUSA_15,CNT = TRUSA$CNT)
mcar_test(TRUSA_05)
## # A tibble: 1 × 4
## statistic df p.value missing.patterns
## <dbl> <dbl> <dbl> <int>
## 1 3255. 19462 1 586
mcar_test(TRUSA_10)
## # A tibble: 1 × 4
## statistic df p.value missing.patterns
## <dbl> <dbl> <dbl> <int>
## 1 3971. 32648 1 1015
mcar_test(TRUSA_15)
## # A tibble: 1 × 4
## statistic df p.value missing.patterns
## <dbl> <dbl> <dbl> <int>
## 1 8839. 34825 1 1136
TRUSA_05 <- na.omit(TRUSA_05)
TRUSA_10 <- na.omit(TRUSA_10)
TRUSA_15 <- na.omit(TRUSA_15)
TRUSA_05 <- TRUSA_05 %>%
mutate(SUMSCORE = rowSums(TRUSA_05[,1:35]))
TRUSA_10 <- TRUSA_10 %>%
mutate(SUMSCORE = rowSums(TRUSA_10[,1:35]))
TRUSA_15 <- TRUSA_15 %>%
mutate(SUMSCORE = rowSums(TRUSA_15[,1:35]))
tr_toplam_05 <- TRUSA_05 %>% filter(CNT == "TUR") %>% select(SUMSCORE) %>% unlist()
usa_toplam_05 <- TRUSA_05 %>% filter(CNT == "USA") %>% select(SUMSCORE) %>% unlist()
tr_toplam_10 <- TRUSA_10 %>% filter(CNT == "TUR") %>% select(SUMSCORE) %>% unlist()
usa_toplam_10 <- TRUSA_10 %>% filter(CNT == "USA") %>% select(SUMSCORE) %>% unlist()
tr_toplam_15 <- TRUSA_15 %>% filter(CNT == "TUR") %>% select(SUMSCORE) %>% unlist()
usa_toplam_15 <- TRUSA_15 %>% filter(CNT == "USA") %>% select(SUMSCORE) %>% unlist()
asil <- t.test(tr_toplam,usa_toplam,paired = F,var.equal = T)
miss_05 <- t.test(tr_toplam_05,usa_toplam_05,paired = F,var.equal = T)
miss_10 <- t.test(tr_toplam_10,usa_toplam_10,paired = F,var.equal = T)
t_stat <- c(asil$statistic,miss_05$statistic,miss_10$statistic) %>% as.data.frame()
#miss_15 <- t.test(tr_toplam_15,usa_toplam_15,paired = F,var.equal = T)
Asıl veri setinden elde edilen toplam puanlar, ülkeler arası istatistiksel olarak anlamlı düzeyde farklılaşmakta iken (t(df) = -7.83(1149), p = 0), %5’lik kayıp veri içeren veri setinde kayıp veri içeren satırlar liste bazlı silindiğinde ülkelerin toplam puanları arasındaki fark istatistiksel olarak anlamsızdır (t(df) = -1.22(190), p = 0.22). %10’luk kayıp veri içeren veri seti için liste bazı silme yapıldığında bulgular %5’lik kayıp verideki ile benzerlik göstermektedir (t(df) = 0.64(26), p = 0.53). %15’lik kayıp veri oranında ise liste bazlı silme yapıldığında örneklem büyüklüğü t-testi yapmak için uygun değildir, bir başka deyişle örneklemin önemli düzeyde daraldığı söylenebilir.
set.seed(59)
TRUSA_05 <- delete_MCAR(TRUSA[,4:38],p = 0.05) %>% missMethods::impute_mean(.,type = "columnwise")
TRUSA_05 <- cbind(TRUSA_05,CNT = TRUSA$CNT)
TRUSA_10 <- delete_MCAR(TRUSA[,4:38],p = 0.10) %>% missMethods::impute_mean(.,type = "columnwise")
TRUSA_10 <- cbind(TRUSA_10,CNT = TRUSA$CNT)
TRUSA_15 <- delete_MCAR(TRUSA[,4:38],p = 0.15) %>% missMethods::impute_mean(.,type = "columnwise")
TRUSA_15 <- cbind(TRUSA_15,CNT = TRUSA$CNT)
TRUSA_05 <- TRUSA_05 %>%
mutate(SUMSCORE = rowSums(TRUSA_05[,1:35]))
TRUSA_10 <- TRUSA_10 %>%
mutate(SUMSCORE = rowSums(TRUSA_10[,1:35]))
TRUSA_15 <- TRUSA_15 %>%
mutate(SUMSCORE = rowSums(TRUSA_15[,1:35]))
tr_toplam_05 <- TRUSA_05 %>% filter(CNT == "TUR") %>% select(SUMSCORE) %>% unlist()
usa_toplam_05 <- TRUSA_05 %>% filter(CNT == "USA") %>% select(SUMSCORE) %>% unlist()
tr_toplam_10 <- TRUSA_10 %>% filter(CNT == "TUR") %>% select(SUMSCORE) %>% unlist()
usa_toplam_10 <- TRUSA_10 %>% filter(CNT == "USA") %>% select(SUMSCORE) %>% unlist()
tr_toplam_15 <- TRUSA_15 %>% filter(CNT == "TUR") %>% select(SUMSCORE) %>% unlist()
usa_toplam_15 <- TRUSA_15 %>% filter(CNT == "USA") %>% select(SUMSCORE) %>% unlist()
asil <- t.test(tr_toplam,usa_toplam,paired = F,var.equal = T)
miss_05 <- t.test(tr_toplam_05,usa_toplam_05,paired = F,var.equal = T)
miss_10 <- t.test(tr_toplam_10,usa_toplam_10,paired = F,var.equal = T)
miss_15 <- t.test(tr_toplam_15,usa_toplam_15,paired = F,var.equal = T)
t_stat <- c(round(asil$statistic,2),
round(miss_05$statistic,2),
round(miss_10$statistic,2),
round(miss_15$statistic,2)) %>% as.data.frame()
Asıl veri setinden elde edilen toplam puanlar, ülkeler arası istatistiksel olarak anlamlı düzeyde farklılaşmaktadır (t(df) = -7.83(1149), p = 0), %5’lik kayıp veri içeren veri setinde kayıp veri içeren hücrelere sütun ortalaması atandığında silindiğinde ülkelerin toplam puanları arasındaki fark istatistiksel olarak anlamlı düzeyde bulunmuştur (t(df) = -7.75(1149), p = 0). %10’luk kayıp veri içeren veri setinde kayıp veri içeren hücrelere sütun ortalaması atandığında silindiğinde ülkelerin toplam puanları arasındaki fark istatistiksel olarak anlamlı düzeyde bulunmuştur (t(df) = -7.6(1149), p = 0). %15’lik kayıp veri içeren veri setinde kayıp veri içeren hücrelere sütun ortalaması atandığında silindiğinde ülkelerin toplam puanları arasındaki fark istatistiksel olarak anlamlı düzeyde bulunmuştur(t(df) = -7.6(1149), p = 0). Her ne kadar tüm kayıp veri oranlarında gruplar arası fark istatistiksel olarak anlamlı olsa da t istatistiğinin kayıp veri oranına göre değişkenlik gösterdiği görülmektedir.
Liste bazlı silme ve sütun ortalaması atama yöntemleri karşılaştırıldığında öne çıkan temel farklılık, liste bazlı silme yönteminde büyük oranda istatistiksel analizlerin gücünü düşmesidir. Örneğin %15’lik kayıp veri oranında liste bazlı silme yapıltığında t-testi kestirimi yapılamamıştır. Sütun bazlı ortalama atama yönteminde ise kayıp veri oranı arttıkça kestirilen t değerinin orijinal değerden uzaklaştığı belirlenmiştir. Bu açıdan kayıp veri oranının arttıkça kestirimlerin yanlı olabileceği söylenebilir. Bulgular %5, %10 ve %15’lik kayıp veri oranı ve liste bazlı silme-ortalama atama yöntemleri ile sınırlıdır.
Ödev yaklaşık 30-35dkmı aldı.