VERİ TEMİZLEME
## [1] "/Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 2.Yarıyıl/OLC731_R ile İleri İstatistik Uygulamaları_Doç. Dr. Kübra Atalay Kabasakal /1.Hafta 16.02.2026"
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## vars n mean sd median trimmed mad min max range
## SINIF 1 6890 9.84 0.46 10.00 9.89 0.00 7.00 12.00 5.00
## CINSIYET 2 6890 1.51 0.50 2.00 1.51 0.00 1.00 2.00 1.00
## Anne_Egitim 3 6835 2.66 1.95 2.00 2.57 1.48 0.00 6.00 6.00
## Baba_Egitim 4 6833 3.12 1.94 2.00 3.08 1.48 0.00 6.00 6.00
## OKUMA_ZEVK 5 6821 0.68 0.98 0.64 0.65 0.88 -2.73 2.66 5.39
## ST097Q01TA 6 6826 3.01 0.74 3.00 3.07 0.00 1.00 4.00 3.00
## ST097Q02TA 7 6807 2.92 0.82 3.00 2.99 0.00 1.00 4.00 3.00
## ST097Q03TA 8 6779 2.94 0.89 3.00 3.05 1.48 1.00 4.00 3.00
## ST097Q04TA 9 6809 2.70 0.86 3.00 2.75 1.48 1.00 4.00 3.00
## ST097Q05TA 10 6821 2.69 0.91 3.00 2.74 1.48 1.00 4.00 3.00
## ODOKUMA1 11 6890 464.23 87.78 463.40 463.90 91.11 175.61 771.51 595.90
## ODOKUMA2 12 6890 464.42 87.70 465.92 464.57 90.33 166.62 729.88 563.26
## ODOKUMA3 13 6890 464.71 87.08 464.62 464.81 91.06 171.84 748.15 576.32
## ODOKUMA4 14 6890 464.61 87.40 464.89 464.48 90.43 184.83 739.18 554.36
## ODOKUMA5 15 6890 464.20 87.21 464.83 464.36 91.02 168.89 747.02 578.13
## skew kurtosis se
## SINIF -0.81 2.14 0.01
## CINSIYET -0.03 -2.00 0.01
## Anne_Egitim 0.45 -1.13 0.02
## Baba_Egitim 0.26 -1.36 0.02
## OKUMA_ZEVK 0.12 0.22 0.01
## ST097Q01TA -0.73 0.77 0.01
## ST097Q02TA -0.65 0.11 0.01
## ST097Q03TA -0.66 -0.20 0.01
## ST097Q04TA -0.41 -0.41 0.01
## ST097Q05TA -0.35 -0.63 0.01
## ODOKUMA1 0.04 -0.30 1.06
## ODOKUMA2 -0.02 -0.33 1.06
## ODOKUMA3 0.00 -0.31 1.05
## ODOKUMA4 0.01 -0.34 1.05
## ODOKUMA5 -0.01 -0.34 1.05
## # A tibble: 6 × 8
## SUBNO TIMEDRS ATTDRUG ATTHOUSE INCOME EMPLMNT MSTATUS RACE
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl+lbl> <dbl> <dbl>
## 1 1 1 8 27 5 1 [HOUSEWFE] 2 1
## 2 2 3 7 20 6 0 [PAIDWORK] 2 1
## 3 3 0 8 23 3 0 [PAIDWORK] 2 1
## 4 4 13 9 28 8 1 [HOUSEWFE] 2 1
## 5 5 15 7 24 1 1 [HOUSEWFE] 2 1
## 6 6 3 8 25 4 0 [PAIDWORK] 2 1
## SUBNO TIMEDRS ATTDRUG ATTHOUSE
## Min. : 1.0 Min. : 0.000 Min. : 5.000 Min. : 2.00
## 1st Qu.:137.0 1st Qu.: 2.000 1st Qu.: 7.000 1st Qu.:21.00
## Median :314.0 Median : 4.000 Median : 8.000 Median :24.00
## Mean :317.4 Mean : 7.901 Mean : 7.686 Mean :23.54
## 3rd Qu.:483.0 3rd Qu.:10.000 3rd Qu.: 9.000 3rd Qu.:27.00
## Max. :758.0 Max. :81.000 Max. :10.000 Max. :35.00
## NA's :1
## INCOME EMPLMNT MSTATUS RACE
## Min. : 1.00 Min. :0.000 Min. :1.000 Min. :1.000
## 1st Qu.: 2.50 1st Qu.:0.000 1st Qu.:2.000 1st Qu.:1.000
## Median : 4.00 Median :0.000 Median :2.000 Median :1.000
## Mean : 4.21 Mean :0.471 Mean :1.778 Mean :1.088
## 3rd Qu.: 6.00 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :10.00 Max. :1.000 Max. :2.000 Max. :2.000
## NA's :26
## tibble [465 × 8] (S3: tbl_df/tbl/data.frame)
## $ SUBNO : num [1:465] 1 2 3 4 5 6 7 8 9 10 ...
## ..- attr(*, "label")= chr "Subject number"
## ..- attr(*, "format.spss")= chr "F3.0"
## $ TIMEDRS : num [1:465] 1 3 0 13 15 3 2 0 7 4 ...
## ..- attr(*, "label")= chr "Visits to health professionals"
## ..- attr(*, "format.spss")= chr "F2.0"
## $ ATTDRUG : num [1:465] 8 7 8 9 7 8 7 7 7 8 ...
## ..- attr(*, "label")= chr "Attitudes toward medication"
## ..- attr(*, "format.spss")= chr "F2.0"
## $ ATTHOUSE: num [1:465] 27 20 23 28 24 25 30 24 20 30 ...
## ..- attr(*, "label")= chr "Attitudes toward housework"
## ..- attr(*, "format.spss")= chr "F2.0"
## $ INCOME : num [1:465] 5 6 3 8 1 4 6 6 2 8 ...
## ..- attr(*, "format.spss")= chr "F2.0"
## $ EMPLMNT : dbl+lbl [1:465] 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, ...
## ..@ label : chr "Whether currently employed"
## ..@ format.spss: chr "F1.0"
## ..@ labels : Named num [1:2] 0 1
## .. ..- attr(*, "names")= chr [1:2] "PAIDWORK" "HOUSEWFE"
## $ MSTATUS : num [1:465] 2 2 2 2 2 2 2 2 2 1 ...
## ..- attr(*, "label")= chr "Whether currently married"
## ..- attr(*, "format.spss")= chr "F1.0"
## $ RACE : num [1:465] 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "format.spss")= chr "F1.0"
## vars n mean sd median trimmed mad min max range skew
## SUBNO 1 465 317.38 194.16 314 313.26 256.49 1 758 757 0.14
## TIMEDRS 2 465 7.90 10.95 4 5.61 4.45 0 81 81 3.23
## ATTDRUG 3 465 7.69 1.16 8 7.71 1.48 5 10 5 -0.12
## ATTHOUSE 4 464 23.54 4.48 24 23.62 4.45 2 35 33 -0.45
## INCOME 5 439 4.21 2.42 4 4.01 2.97 1 10 9 0.58
## EMPLMNT 6 465 0.47 0.50 0 0.46 0.00 0 1 1 0.12
## MSTATUS 7 465 1.78 0.42 2 1.85 0.00 1 2 1 -1.34
## RACE 8 465 1.09 0.28 1 1.00 0.00 1 2 1 2.90
## kurtosis se
## SUBNO -0.99 9.00
## TIMEDRS 12.88 0.51
## ATTDRUG -0.47 0.05
## ATTHOUSE 1.51 0.21
## INCOME -0.38 0.12
## EMPLMNT -1.99 0.02
## MSTATUS -0.21 0.02
## RACE 6.40 0.01
## Warning: package 'gtsummary' was built under R version 4.5.2
screen %>%
select(2:6) %>%
tbl_summary(
statistic = all_continuous() ~ c("{min},{max}"),
missing = "always"
)## ! Column(s) "EMPLMNT" are class "haven_labelled".
## ℹ This is an intermediate data structure not meant for analysis.
## ℹ Convert columns with `haven::as_factor()`, `labelled::to_factor()`,
## `labelled::unlabelled()`, and `unclass()`. Failure to convert may have
## unintended consequences or result in error.
## <https://haven.tidyverse.org/articles/semantics.html>
## <https://larmarange.github.io/labelled/articles/intro_labelled.html#unlabelled>
| Characteristic | N = 4651 |
|---|---|
| Visits to health professionals | 0,81 |
| Unknown | 0 |
| Attitudes toward medication | |
| 5 | 13 (2.8%) |
| 6 | 60 (13%) |
| 7 | 126 (27%) |
| 8 | 149 (32%) |
| 9 | 95 (20%) |
| 10 | 22 (4.7%) |
| Unknown | 0 |
| Attitudes toward housework | 2.0,35.0 |
| Unknown | 1 |
| INCOME | 1.00,10.00 |
| Unknown | 26 |
| Whether currently employed | |
| 0 | 246 (53%) |
| 1 | 219 (47%) |
| Unknown | 0 |
| 1 Min,Max; n (%) | |
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
| Variable | NotNA | Min | Max |
|---|---|---|---|
| SUBNO | 465 | 1 | 758 |
| TIMEDRS | 465 | 0 | 81 |
| ATTDRUG | 465 | 5 | 10 |
| ATTHOUSE | 464 | 2 | 35 |
| INCOME | 439 | 1 | 10 |
| MSTATUS | 465 | 1 | 2 |
| RACE | 465 | 1 | 2 |
| Variable | Frekans | Minimum | Maximum |
|---|---|---|---|
| SUBNO | 465 | 1 | 758 |
| TIMEDRS | 465 | 0 | 81 |
| ATTDRUG | 465 | 5 | 10 |
| ATTHOUSE | 464 | 2 | 35 |
| INCOME | 439 | 1 | 10 |
| MSTATUS | 465 | 1 | 2 |
| RACE | 465 | 1 | 2 |
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TIMEDRS | 1 | 465 | 7.901 | 10.948 | 4 | 5.606 | 4.448 | 0 | 81 | 81 | 3.227 | 12.879 | 0.508 |
| ATTDRUG | 2 | 465 | 7.686 | 1.156 | 8 | 7.708 | 1.483 | 5 | 10 | 5 | -0.122 | -0.466 | 0.054 |
| ATTHOUSE | 3 | 464 | 23.541 | 4.484 | 24 | 23.624 | 4.448 | 2 | 35 | 33 | -0.454 | 1.507 | 0.208 |
| INCOME | 4 | 439 | 4.210 | 2.419 | 4 | 4.014 | 2.965 | 1 | 10 | 9 | 0.578 | -0.381 | 0.115 |
| EMPLMNT | 5 | 465 | 0.471 | 0.500 | 0 | 0.464 | 0.000 | 0 | 1 | 1 | 0.116 | -1.991 | 0.023 |
| MSTATUS | 6 | 465 | 1.778 | 0.416 | 2 | 1.847 | 0.000 | 1 | 2 | 1 | -1.337 | -0.213 | 0.019 |
| RACE | 7 | 465 | 1.088 | 0.284 | 1 | 1.000 | 0.000 | 1 | 2 | 1 | 2.895 | 6.398 | 0.013 |
## Warning: package 'skimr' was built under R version 4.5.2
| Name | screen |
| Number of rows | 465 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| numeric | 8 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| SUBNO | 0 | 1.00 | 317.38 | 194.16 | 1 | 137.0 | 314 | 483 | 758 | ▇▆▆▇▁ |
| TIMEDRS | 0 | 1.00 | 7.90 | 10.95 | 0 | 2.0 | 4 | 10 | 81 | ▇▁▁▁▁ |
| ATTDRUG | 0 | 1.00 | 7.69 | 1.16 | 5 | 7.0 | 8 | 9 | 10 | ▃▇▇▅▁ |
| ATTHOUSE | 1 | 1.00 | 23.54 | 4.48 | 2 | 21.0 | 24 | 27 | 35 | ▁▁▅▇▂ |
| INCOME | 26 | 0.94 | 4.21 | 2.42 | 1 | 2.5 | 4 | 6 | 10 | ▆▇▅▃▂ |
| EMPLMNT | 0 | 1.00 | 0.47 | 0.50 | 0 | 0.0 | 0 | 1 | 1 | ▇▁▁▁▇ |
| MSTATUS | 0 | 1.00 | 1.78 | 0.42 | 1 | 2.0 | 2 | 2 | 2 | ▂▁▁▁▇ |
| RACE | 0 | 1.00 | 1.09 | 0.28 | 1 | 1.0 | 1 | 1 | 2 | ▇▁▁▁▁ |
| Name | midiPISA |
| Number of rows | 6890 |
| Number of columns | 16 |
| _______________________ | |
| Column type frequency: | |
| numeric | 16 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| OGRENCIID | 0 | 1.00 | 79203623.28 | 2086.54 | 79200001.00 | 79201814.25 | 79203625.50 | 79205426.75 | 79207242.00 | ▇▇▇▇▇ |
| SINIF | 0 | 1.00 | 9.84 | 0.46 | 7.00 | 10.00 | 10.00 | 10.00 | 12.00 | ▁▂▇▁▁ |
| CINSIYET | 0 | 1.00 | 1.51 | 0.50 | 1.00 | 1.00 | 2.00 | 2.00 | 2.00 | ▇▁▁▁▇ |
| Anne_Egitim | 55 | 0.99 | 2.66 | 1.95 | 0.00 | 1.00 | 2.00 | 4.00 | 6.00 | ▇▅▂▂▅ |
| Baba_Egitim | 57 | 0.99 | 3.12 | 1.94 | 0.00 | 1.00 | 2.00 | 5.00 | 6.00 | ▆▆▂▂▇ |
| OKUMA_ZEVK | 69 | 0.99 | 0.68 | 0.98 | -2.73 | 0.01 | 0.64 | 1.23 | 2.66 | ▁▁▇▇▃ |
| ST097Q01TA | 64 | 0.99 | 3.01 | 0.74 | 1.00 | 3.00 | 3.00 | 3.00 | 4.00 | ▁▂▁▇▃ |
| ST097Q02TA | 83 | 0.99 | 2.92 | 0.82 | 1.00 | 3.00 | 3.00 | 3.00 | 4.00 | ▁▂▁▇▃ |
| ST097Q03TA | 111 | 0.98 | 2.94 | 0.89 | 1.00 | 3.00 | 3.00 | 4.00 | 4.00 | ▂▂▁▇▅ |
| ST097Q04TA | 81 | 0.99 | 2.70 | 0.86 | 1.00 | 2.00 | 3.00 | 3.00 | 4.00 | ▂▃▁▇▂ |
| ST097Q05TA | 69 | 0.99 | 2.69 | 0.91 | 1.00 | 2.00 | 3.00 | 3.00 | 4.00 | ▂▅▁▇▃ |
| ODOKUMA1 | 0 | 1.00 | 464.23 | 87.78 | 175.61 | 402.56 | 463.40 | 525.72 | 771.51 | ▁▅▇▃▁ |
| ODOKUMA2 | 0 | 1.00 | 464.42 | 87.70 | 166.62 | 403.45 | 465.92 | 525.32 | 729.88 | ▁▃▇▅▁ |
| ODOKUMA3 | 0 | 1.00 | 464.71 | 87.08 | 171.84 | 403.36 | 464.62 | 526.30 | 748.15 | ▁▃▇▅▁ |
| ODOKUMA4 | 0 | 1.00 | 464.61 | 87.40 | 184.83 | 402.52 | 464.89 | 524.91 | 739.18 | ▁▅▇▅▁ |
| ODOKUMA5 | 0 | 1.00 | 464.20 | 87.21 | 168.89 | 403.08 | 464.83 | 525.74 | 747.02 | ▁▃▇▅▁ |
##
##
## processing file: report.rmd
## | | | 0% | |. | 2% | |.. | 5% [global_options] | |... | 7% | |.... | 10% [introduce] | |.... | 12% | |..... | 14% [plot_intro]
## | |...... | 17% | |....... | 19% [data_structure] | |........ | 21% | |......... | 24% [missing_profile]
## | |.......... | 26% | |........... | 29% [univariate_distribution_header] | |........... | 31% | |............ | 33% [plot_histogram]
## | |............. | 36% | |.............. | 38% [plot_density] | |............... | 40% | |................ | 43% [plot_frequency_bar] | |................. | 45% | |.................. | 48% [plot_response_bar] | |.................. | 50% | |................... | 52% [plot_with_bar] | |.................... | 55% | |..................... | 57% [plot_normal_qq]
## | |...................... | 60% | |....................... | 62% [plot_response_qq] | |........................ | 64% | |......................... | 67% [plot_by_qq] | |.......................... | 69% | |.......................... | 71% [correlation_analysis]
## | |........................... | 74% | |............................ | 76% [principal_component_analysis]
## | |............................. | 79% | |.............................. | 81% [bivariate_distribution_header] | |............................... | 83% | |................................ | 86% [plot_response_boxplot] | |................................. | 88% | |................................. | 90% [plot_by_boxplot] | |.................................. | 93% | |................................... | 95% [plot_response_scatterplot] | |.................................... | 98% | |.....................................| 100% [plot_by_scatterplot]
## output file: /Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 2.Yarıyıl/OLC731_R ile İleri İstatistik Uygulamaları_Doç. Dr. Kübra Atalay Kabasakal /1.Hafta 16.02.2026/report.knit.md
## '/Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 1.Yarıyıl/OLC733_R Yazılımı ile Veri Analizi_Doç. Dr. Kübra Atalay Kabasakal/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/pandoc' +RTS -K512m -RTS '/Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 2.Yarıyıl/OLC731_R ile İleri İstatistik Uygulamaları_Doç. Dr. Kübra Atalay Kabasakal /1.Hafta 16.02.2026/report.knit.md' --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc158044e5a80f.html --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/latex-div.lua --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/table-classes.lua --embed-resources --standalone --variable bs3=TRUE --section-divs --table-of-contents --toc-depth 6 --template /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=yeti --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /var/folders/7w/tm3yfklj31q6cm334dm97bj80000gn/T//RtmpyUcXLK/rmarkdown-str15805520712a.html
##
## Output created: report.html
##
##
## processing file: report.rmd
## | | | 0% | |. | 2% | |.. | 5% [global_options] | |... | 7% | |.... | 10% [introduce] | |.... | 12% | |..... | 14% [plot_intro]
## | |...... | 17% | |....... | 19% [data_structure] | |........ | 21% | |......... | 24% [missing_profile]
## | |.......... | 26% | |........... | 29% [univariate_distribution_header] | |........... | 31% | |............ | 33% [plot_histogram]
## | |............. | 36% | |.............. | 38% [plot_density] | |............... | 40% | |................ | 43% [plot_frequency_bar] | |................. | 45% | |.................. | 48% [plot_response_bar] | |.................. | 50% | |................... | 52% [plot_with_bar] | |.................... | 55% | |..................... | 57% [plot_normal_qq]
## | |...................... | 60% | |....................... | 62% [plot_response_qq] | |........................ | 64% | |......................... | 67% [plot_by_qq] | |.......................... | 69% | |.......................... | 71% [correlation_analysis]
## | |........................... | 74% | |............................ | 76% [principal_component_analysis]
## | |............................. | 79% | |.............................. | 81% [bivariate_distribution_header] | |............................... | 83% | |................................ | 86% [plot_response_boxplot] | |................................. | 88% | |................................. | 90% [plot_by_boxplot] | |.................................. | 93% | |................................... | 95% [plot_response_scatterplot] | |.................................... | 98% | |.....................................| 100% [plot_by_scatterplot]
## output file: /Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 2.Yarıyıl/OLC731_R ile İleri İstatistik Uygulamaları_Doç. Dr. Kübra Atalay Kabasakal /1.Hafta 16.02.2026/report.knit.md
## '/Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 1.Yarıyıl/OLC733_R Yazılımı ile Veri Analizi_Doç. Dr. Kübra Atalay Kabasakal/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/pandoc' +RTS -K512m -RTS '/Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 2.Yarıyıl/OLC731_R ile İleri İstatistik Uygulamaları_Doç. Dr. Kübra Atalay Kabasakal /1.Hafta 16.02.2026/report.knit.md' --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc15803017ae15.html --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/latex-div.lua --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/table-classes.lua --embed-resources --standalone --variable bs3=TRUE --section-divs --table-of-contents --toc-depth 6 --template /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=yeti --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /var/folders/7w/tm3yfklj31q6cm334dm97bj80000gn/T//RtmpyUcXLK/rmarkdown-str158015a7a076.html
##
## Output created: report.html
## # A tibble: 10 × 8
## SUBNO TIMEDRS ATTDRUG ATTHOUSE INCOME EMPLMNT MSTATUS RACE
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 8 27 5 1 2 1
## 2 2 3 7 20 6 0 2 1
## 3 3 0 8 23 3 0 2 1
## 4 4 13 9 28 8 1 2 1
## 5 5 15 7 24 1 1 2 1
## 6 6 3 8 25 4 0 2 1
## 7 7 2 7 30 6 1 2 1
## 8 8 0 7 24 6 1 2 1
## 9 9 7 7 20 2 1 2 1
## 10 10 4 8 30 8 0 1 1
##
## Attaching package: 'naniar'
## The following object is masked from 'package:skimr':
##
## n_complete
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## [1] TRUE
## [1] 27
## [1] 0.007258065
## SUBNO TIMEDRS ATTDRUG ATTHOUSE INCOME EMPLMNT MSTATUS RACE
## 0 0 0 1 26 0 0 0
## # A tibble: 8 × 3
## variable n_miss pct_miss
## <chr> <int> <num>
## 1 INCOME 26 5.59
## 2 ATTHOUSE 1 0.215
## 3 SUBNO 0 0
## 4 TIMEDRS 0 0
## 5 ATTDRUG 0 0
## 6 EMPLMNT 0 0
## 7 MSTATUS 0 0
## 8 RACE 0 0
## # A tibble: 3 × 3
## n_miss_in_var n_vars pct_vars
## <int> <int> <dbl>
## 1 0 6 75
## 2 1 1 12.5
## 3 26 1 12.5
## # A tibble: 2 × 3
## n_miss_in_case n_cases pct_cases
## <int> <int> <dbl>
## 1 0 438 94.2
## 2 1 27 5.81
## # A tibble: 465 × 3
## case n_miss pct_miss
## <int> <int> <dbl>
## 1 52 1 12.5
## 2 64 1 12.5
## 3 69 1 12.5
## 4 77 1 12.5
## 5 118 1 12.5
## 6 135 1 12.5
## 7 161 1 12.5
## 8 172 1 12.5
## 9 173 1 12.5
## 10 174 1 12.5
## # ℹ 455 more rows
## Warning: package 'rlang' was built under R version 4.5.2
##
## Attaching package: 'rlang'
## The following object is masked from 'package:data.table':
##
## :=
library(UpSetR) #nainar paketi hata verirse önce bu iki pkaeti de aktifleştirelim
gg_miss_upset(screen) #bu sayede iki farklı değişkende ortak eksik veri var mı bunu görebiliriz## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
MCAR TESTİ
## # A tibble: 1 × 4
## statistic df p.value missing.patterns
## <dbl> <dbl> <dbl> <int>
## 1 19.6 12 0.0752 3
screen_MAR <- screen
screen_MAR$INCOME_m <- screen_MAR$INCOME
library(finalfit)
explanatory=c("TIMEDRS", "ATTDRUG", "ATTHOUSE")
dependent = "INCOME_m"
screen_MAR %>% missing_compare(dependent, explanatory) %>%
knitr::kable(row.names = FALSE, align = c("l","l","r","r","r"))| Missing data analysis: INCOME_m | Not missing | Missing | p | |
|---|---|---|---|---|
| TIMEDRS | Mean (SD) | 7.9 (11.1) | 7.6 (7.4) | 0.891 |
| ATTDRUG | Mean (SD) | 7.7 (1.2) | 7.9 (1.0) | 0.368 |
| ATTHOUSE | Mean (SD) | 23.5 (4.5) | 23.7 (4.2) | 0.860 |
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.2.0 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ rlang:::=() masks data.table:::=()
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ data.table::between() masks dplyr::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ data.table::first() masks dplyr::first()
## ✖ purrr::flatten() masks rlang::flatten()
## ✖ purrr::flatten_chr() masks rlang::flatten_chr()
## ✖ purrr::flatten_dbl() masks rlang::flatten_dbl()
## ✖ purrr::flatten_int() masks rlang::flatten_int()
## ✖ purrr::flatten_lgl() masks rlang::flatten_lgl()
## ✖ purrr::flatten_raw() masks rlang::flatten_raw()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ purrr::invoke() masks rlang::invoke()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ data.table::last() masks dplyr::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second() masks data.table::second()
## ✖ purrr::splice() masks rlang::splice()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
VERİ SİLME
Liste olarak
## # A tibble: 438 × 8
## SUBNO TIMEDRS ATTDRUG ATTHOUSE INCOME EMPLMNT MSTATUS RACE
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 8 27 5 1 2 1
## 2 2 3 7 20 6 0 2 1
## 3 3 0 8 23 3 0 2 1
## 4 4 13 9 28 8 1 2 1
## 5 5 15 7 24 1 1 2 1
## 6 6 3 8 25 4 0 2 1
## 7 7 2 7 30 6 1 2 1
## 8 8 0 7 24 6 1 2 1
## 9 9 7 7 20 2 1 2 1
## 10 10 4 8 30 8 0 1 1
## # ℹ 428 more rows
VERİ ATAMA
Ortalama atama
screen3 <- screen
screen3$INCOME[is.na(screen3$INCOME)] <-
mean(screen3$INCOME, na.rm = TRUE)
sd(screen3$INCOME)## [1] 2.350128
## [1] 2.418875
Döngü ile ortalama atama
screen4 <- screen [,2:5]
for(i in 1:ncol(screen4)) {
screen4[,i][is.na(screen4[,i])] <-
mean(screen4[,i], na.rm=TRUE)
}
any_na(screen4)## [1] FALSE
Apply ailesi ile ortalama atama
screen4 <- data.frame(lapply(screen4, function(x) {
x[is.na(x)] <- mean(x, na.rm = TRUE)
x
}))
any_na(screen4)## [1] FALSE
NOT: MEDYAN ATAMAK ORTALAMA ATAMAKTAN DAHA İYİDİR ÇÜNKÜ DEĞİŞKENLİĞE ETKİSİ DAHA AZDIR.