1.Hafta Ev Çalışması

VERİ TEMİZLEME

getwd()

## [1] "/Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 2.Yarıyıl/OLC731_R ile İleri İstatistik Uygulamaları_Doç. Dr. Kübra Atalay Kabasakal /1.Hafta 16.02.2026"

load("midiPISA.rda")

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.5.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(psych)

library(haven)

describe(midiPISA [,-1])

##             vars    n   mean    sd median trimmed   mad    min    max  range
## SINIF          1 6890   9.84  0.46  10.00    9.89  0.00   7.00  12.00   5.00
## CINSIYET       2 6890   1.51  0.50   2.00    1.51  0.00   1.00   2.00   1.00
## Anne_Egitim    3 6835   2.66  1.95   2.00    2.57  1.48   0.00   6.00   6.00
## Baba_Egitim    4 6833   3.12  1.94   2.00    3.08  1.48   0.00   6.00   6.00
## OKUMA_ZEVK     5 6821   0.68  0.98   0.64    0.65  0.88  -2.73   2.66   5.39
## ST097Q01TA     6 6826   3.01  0.74   3.00    3.07  0.00   1.00   4.00   3.00
## ST097Q02TA     7 6807   2.92  0.82   3.00    2.99  0.00   1.00   4.00   3.00
## ST097Q03TA     8 6779   2.94  0.89   3.00    3.05  1.48   1.00   4.00   3.00
## ST097Q04TA     9 6809   2.70  0.86   3.00    2.75  1.48   1.00   4.00   3.00
## ST097Q05TA    10 6821   2.69  0.91   3.00    2.74  1.48   1.00   4.00   3.00
## ODOKUMA1      11 6890 464.23 87.78 463.40  463.90 91.11 175.61 771.51 595.90
## ODOKUMA2      12 6890 464.42 87.70 465.92  464.57 90.33 166.62 729.88 563.26
## ODOKUMA3      13 6890 464.71 87.08 464.62  464.81 91.06 171.84 748.15 576.32
## ODOKUMA4      14 6890 464.61 87.40 464.89  464.48 90.43 184.83 739.18 554.36
## ODOKUMA5      15 6890 464.20 87.21 464.83  464.36 91.02 168.89 747.02 578.13
##              skew kurtosis   se
## SINIF       -0.81     2.14 0.01
## CINSIYET    -0.03    -2.00 0.01
## Anne_Egitim  0.45    -1.13 0.02
## Baba_Egitim  0.26    -1.36 0.02
## OKUMA_ZEVK   0.12     0.22 0.01
## ST097Q01TA  -0.73     0.77 0.01
## ST097Q02TA  -0.65     0.11 0.01
## ST097Q03TA  -0.66    -0.20 0.01
## ST097Q04TA  -0.41    -0.41 0.01
## ST097Q05TA  -0.35    -0.63 0.01
## ODOKUMA1     0.04    -0.30 1.06
## ODOKUMA2    -0.02    -0.33 1.06
## ODOKUMA3     0.00    -0.31 1.05
## ODOKUMA4     0.01    -0.34 1.05
## ODOKUMA5    -0.01    -0.34 1.05

screen <- read_sav("SCREEN.SAV")

head(screen)

## # A tibble: 6 × 8
##   SUBNO TIMEDRS ATTDRUG ATTHOUSE INCOME EMPLMNT      MSTATUS  RACE
##   <dbl>   <dbl>   <dbl>    <dbl>  <dbl> <dbl+lbl>      <dbl> <dbl>
## 1     1       1       8       27      5 1 [HOUSEWFE]       2     1
## 2     2       3       7       20      6 0 [PAIDWORK]       2     1
## 3     3       0       8       23      3 0 [PAIDWORK]       2     1
## 4     4      13       9       28      8 1 [HOUSEWFE]       2     1
## 5     5      15       7       24      1 1 [HOUSEWFE]       2     1
## 6     6       3       8       25      4 0 [PAIDWORK]       2     1

summary(screen)

##      SUBNO          TIMEDRS          ATTDRUG          ATTHOUSE    
##  Min.   :  1.0   Min.   : 0.000   Min.   : 5.000   Min.   : 2.00  
##  1st Qu.:137.0   1st Qu.: 2.000   1st Qu.: 7.000   1st Qu.:21.00  
##  Median :314.0   Median : 4.000   Median : 8.000   Median :24.00  
##  Mean   :317.4   Mean   : 7.901   Mean   : 7.686   Mean   :23.54  
##  3rd Qu.:483.0   3rd Qu.:10.000   3rd Qu.: 9.000   3rd Qu.:27.00  
##  Max.   :758.0   Max.   :81.000   Max.   :10.000   Max.   :35.00  
##                                                    NA's   :1      
##      INCOME         EMPLMNT         MSTATUS           RACE      
##  Min.   : 1.00   Min.   :0.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 2.50   1st Qu.:0.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median : 4.00   Median :0.000   Median :2.000   Median :1.000  
##  Mean   : 4.21   Mean   :0.471   Mean   :1.778   Mean   :1.088  
##  3rd Qu.: 6.00   3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :10.00   Max.   :1.000   Max.   :2.000   Max.   :2.000  
##  NA's   :26

str(screen)

## tibble [465 × 8] (S3: tbl_df/tbl/data.frame)
##  $ SUBNO   : num [1:465] 1 2 3 4 5 6 7 8 9 10 ...
##   ..- attr(*, "label")= chr "Subject number"
##   ..- attr(*, "format.spss")= chr "F3.0"
##  $ TIMEDRS : num [1:465] 1 3 0 13 15 3 2 0 7 4 ...
##   ..- attr(*, "label")= chr "Visits to health professionals"
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ ATTDRUG : num [1:465] 8 7 8 9 7 8 7 7 7 8 ...
##   ..- attr(*, "label")= chr "Attitudes toward medication"
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ ATTHOUSE: num [1:465] 27 20 23 28 24 25 30 24 20 30 ...
##   ..- attr(*, "label")= chr "Attitudes toward housework"
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ INCOME  : num [1:465] 5 6 3 8 1 4 6 6 2 8 ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ EMPLMNT : dbl+lbl [1:465] 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, ...
##    ..@ label      : chr "Whether currently employed"
##    ..@ format.spss: chr "F1.0"
##    ..@ labels     : Named num [1:2] 0 1
##    .. ..- attr(*, "names")= chr [1:2] "PAIDWORK" "HOUSEWFE"
##  $ MSTATUS : num [1:465] 2 2 2 2 2 2 2 2 2 1 ...
##   ..- attr(*, "label")= chr "Whether currently married"
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ RACE    : num [1:465] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"

describe(screen)

##          vars   n   mean     sd median trimmed    mad min max range  skew
## SUBNO       1 465 317.38 194.16    314  313.26 256.49   1 758   757  0.14
## TIMEDRS     2 465   7.90  10.95      4    5.61   4.45   0  81    81  3.23
## ATTDRUG     3 465   7.69   1.16      8    7.71   1.48   5  10     5 -0.12
## ATTHOUSE    4 464  23.54   4.48     24   23.62   4.45   2  35    33 -0.45
## INCOME      5 439   4.21   2.42      4    4.01   2.97   1  10     9  0.58
## EMPLMNT     6 465   0.47   0.50      0    0.46   0.00   0   1     1  0.12
## MSTATUS     7 465   1.78   0.42      2    1.85   0.00   1   2     1 -1.34
## RACE        8 465   1.09   0.28      1    1.00   0.00   1   2     1  2.90
##          kurtosis   se
## SUBNO       -0.99 9.00
## TIMEDRS     12.88 0.51
## ATTDRUG     -0.47 0.05
## ATTHOUSE     1.51 0.21
## INCOME      -0.38 0.12
## EMPLMNT     -1.99 0.02
## MSTATUS     -0.21 0.02
## RACE         6.40 0.01

library(gtsummary)

## Warning: package 'gtsummary' was built under R version 4.5.2

screen %>%
  select(2:6) %>%
  tbl_summary(
      statistic = all_continuous() ~ c("{min},{max}"),
      missing = "always"
  )

## ! Column(s) "EMPLMNT" are class "haven_labelled".
## ℹ This is an intermediate data structure not meant for analysis.
## ℹ Convert columns with `haven::as_factor()`, `labelled::to_factor()`,
##   `labelled::unlabelled()`, and `unclass()`. Failure to convert may have
##   unintended consequences or result in error.
## <https://haven.tidyverse.org/articles/semantics.html>
## <https://larmarange.github.io/labelled/articles/intro_labelled.html#unlabelled>

Characteristic	N = 465¹
Visits to health professionals	0,81
Unknown	0
Attitudes toward medication
5	13 (2.8%)
6	60 (13%)
7	126 (27%)
8	149 (32%)
9	95 (20%)
10	22 (4.7%)
Unknown	0
Attitudes toward housework	2.0,35.0
Unknown	1
INCOME	1.00,10.00
Unknown	26
Whether currently employed
0	246 (53%)
1	219 (47%)
Unknown	0
¹ Min,Max; n (%)

library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(vtable)

sumtable(screen, summ = c('notNA(x)', 'min(x)', 'max(x)'))

Summary Statistics
Variable	NotNA	Min	Max
SUBNO	465	1	758
TIMEDRS	465	0	81
ATTDRUG	465	5	10
ATTHOUSE	464	2	35
INCOME	439	1	10
MSTATUS	465	1	2
RACE	465	1	2

st(screen, summ= c('notNA(x)', 'min(x)', 'max(x)'),
summ.names= c('Frekans', 'Minimum', 'Maximum'))

Summary Statistics
Variable	Frekans	Minimum	Maximum
SUBNO	465	1	758
TIMEDRS	465	0	81
ATTDRUG	465	5	10
ATTHOUSE	464	2	35
INCOME	439	1	10
MSTATUS	465	1	2
RACE	465	1	2

kable(describe(screen[,-1]), format = 'markdown', caption= 'Betimsel istatisikler', digits = 3)

Betimsel istatisikler
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
TIMEDRS	1	465	7.901	10.948	4	5.606	4.448	0	81	81	3.227	12.879	0.508
ATTDRUG	2	465	7.686	1.156	8	7.708	1.483	5	10	5	-0.122	-0.466	0.054
ATTHOUSE	3	464	23.541	4.484	24	23.624	4.448	2	35	33	-0.454	1.507	0.208
INCOME	4	439	4.210	2.419	4	4.014	2.965	1	10	9	0.578	-0.381	0.115
EMPLMNT	5	465	0.471	0.500	0	0.464	0.000	0	1	1	0.116	-1.991	0.023
MSTATUS	6	465	1.778	0.416	2	1.847	0.000	1	2	1	-1.337	-0.213	0.019
RACE	7	465	1.088	0.284	1	1.000	0.000	1	2	1	2.895	6.398	0.013

library(skimr)

## Warning: package 'skimr' was built under R version 4.5.2

skim(screen)

Data summary
Name	screen
Number of rows	465
Number of columns	8
_______________________
Column type frequency:
numeric	8
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
SUBNO	0	1.00	317.38	194.16	1	137.0	314	483	758	▇▆▆▇▁
TIMEDRS	0	1.00	7.90	10.95	0	2.0	4	10	81	▇▁▁▁▁
ATTDRUG	0	1.00	7.69	1.16	5	7.0	8	9	10	▃▇▇▅▁
ATTHOUSE	1	1.00	23.54	4.48	2	21.0	24	27	35	▁▁▅▇▂
INCOME	26	0.94	4.21	2.42	1	2.5	4	6	10	▆▇▅▃▂
EMPLMNT	0	1.00	0.47	0.50	0	0.0	0	1	1	▇▁▁▁▇
MSTATUS	0	1.00	1.78	0.42	1	2.0	2	2	2	▂▁▁▁▇
RACE	0	1.00	1.09	0.28	1	1.0	1	1	2	▇▁▁▁▁

skim(midiPISA)

Data summary
Name	midiPISA
Number of rows	6890
Number of columns	16
_______________________
Column type frequency:
numeric	16
________________________
Group variables	None

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
OGRENCIID	0	1.00	79203623.28	2086.54	79200001.00	79201814.25	79203625.50	79205426.75	79207242.00	▇▇▇▇▇
SINIF	0	1.00	9.84	0.46	7.00	10.00	10.00	10.00	12.00	▁▂▇▁▁
CINSIYET	0	1.00	1.51	0.50	1.00	1.00	2.00	2.00	2.00	▇▁▁▁▇
Anne_Egitim	55	0.99	2.66	1.95	0.00	1.00	2.00	4.00	6.00	▇▅▂▂▅
Baba_Egitim	57	0.99	3.12	1.94	0.00	1.00	2.00	5.00	6.00	▆▆▂▂▇
OKUMA_ZEVK	69	0.99	0.68	0.98	-2.73	0.01	0.64	1.23	2.66	▁▁▇▇▃
ST097Q01TA	64	0.99	3.01	0.74	1.00	3.00	3.00	3.00	4.00	▁▂▁▇▃
ST097Q02TA	83	0.99	2.92	0.82	1.00	3.00	3.00	3.00	4.00	▁▂▁▇▃
ST097Q03TA	111	0.98	2.94	0.89	1.00	3.00	3.00	4.00	4.00	▂▂▁▇▅
ST097Q04TA	81	0.99	2.70	0.86	1.00	2.00	3.00	3.00	4.00	▂▃▁▇▂
ST097Q05TA	69	0.99	2.69	0.91	1.00	2.00	3.00	3.00	4.00	▂▅▁▇▃
ODOKUMA1	0	1.00	464.23	87.78	175.61	402.56	463.40	525.72	771.51	▁▅▇▃▁
ODOKUMA2	0	1.00	464.42	87.70	166.62	403.45	465.92	525.32	729.88	▁▃▇▅▁
ODOKUMA3	0	1.00	464.71	87.08	171.84	403.36	464.62	526.30	748.15	▁▃▇▅▁
ODOKUMA4	0	1.00	464.61	87.40	184.83	402.52	464.89	524.91	739.18	▁▅▇▅▁
ODOKUMA5	0	1.00	464.20	87.21	168.89	403.08	464.83	525.74	747.02	▁▃▇▅▁

#rapor oluşturmak için

library(DataExplorer)

create_report(midiPISA)

## 
## 
## processing file: report.rmd

##   |                                             |                                     |   0%  |                                             |.                                    |   2%                                   |                                             |..                                   |   5% [global_options]                  |                                             |...                                  |   7%                                   |                                             |....                                 |  10% [introduce]                       |                                             |....                                 |  12%                                   |                                             |.....                                |  14% [plot_intro]

##   |                                             |......                               |  17%                                   |                                             |.......                              |  19% [data_structure]                  |                                             |........                             |  21%                                   |                                             |.........                            |  24% [missing_profile]

##   |                                             |..........                           |  26%                                   |                                             |...........                          |  29% [univariate_distribution_header]  |                                             |...........                          |  31%                                   |                                             |............                         |  33% [plot_histogram]

##   |                                             |.............                        |  36%                                   |                                             |..............                       |  38% [plot_density]                    |                                             |...............                      |  40%                                   |                                             |................                     |  43% [plot_frequency_bar]              |                                             |.................                    |  45%                                   |                                             |..................                   |  48% [plot_response_bar]               |                                             |..................                   |  50%                                   |                                             |...................                  |  52% [plot_with_bar]                   |                                             |....................                 |  55%                                   |                                             |.....................                |  57% [plot_normal_qq]

##   |                                             |......................               |  60%                                   |                                             |.......................              |  62% [plot_response_qq]                |                                             |........................             |  64%                                   |                                             |.........................            |  67% [plot_by_qq]                      |                                             |..........................           |  69%                                   |                                             |..........................           |  71% [correlation_analysis]

##   |                                             |...........................          |  74%                                   |                                             |............................         |  76% [principal_component_analysis]

##   |                                             |.............................        |  79%                                   |                                             |..............................       |  81% [bivariate_distribution_header]   |                                             |...............................      |  83%                                   |                                             |................................     |  86% [plot_response_boxplot]           |                                             |.................................    |  88%                                   |                                             |.................................    |  90% [plot_by_boxplot]                 |                                             |..................................   |  93%                                   |                                             |...................................  |  95% [plot_response_scatterplot]       |                                             |.................................... |  98%                                   |                                             |.....................................| 100% [plot_by_scatterplot]

## output file: /Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 2.Yarıyıl/OLC731_R ile İleri İstatistik Uygulamaları_Doç. Dr. Kübra Atalay Kabasakal /1.Hafta 16.02.2026/report.knit.md

## '/Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 1.Yarıyıl/OLC733_R Yazılımı ile Veri Analizi_Doç. Dr. Kübra Atalay Kabasakal/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/pandoc' +RTS -K512m -RTS '/Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 2.Yarıyıl/OLC731_R ile İleri İstatistik Uygulamaları_Doç. Dr. Kübra Atalay Kabasakal /1.Hafta 16.02.2026/report.knit.md' --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc158044e5a80f.html --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/latex-div.lua --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/table-classes.lua --embed-resources --standalone --variable bs3=TRUE --section-divs --table-of-contents --toc-depth 6 --template /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=yeti --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /var/folders/7w/tm3yfklj31q6cm334dm97bj80000gn/T//RtmpyUcXLK/rmarkdown-str15805520712a.html

## 
## Output created: report.html

create_report(screen)

## 
## 
## processing file: report.rmd

##   |                                             |                                     |   0%  |                                             |.                                    |   2%                                   |                                             |..                                   |   5% [global_options]                  |                                             |...                                  |   7%                                   |                                             |....                                 |  10% [introduce]                       |                                             |....                                 |  12%                                   |                                             |.....                                |  14% [plot_intro]

##   |                                             |......                               |  17%                                   |                                             |.......                              |  19% [data_structure]                  |                                             |........                             |  21%                                   |                                             |.........                            |  24% [missing_profile]

##   |                                             |..........                           |  26%                                   |                                             |...........                          |  29% [univariate_distribution_header]  |                                             |...........                          |  31%                                   |                                             |............                         |  33% [plot_histogram]

##   |                                             |.............                        |  36%                                   |                                             |..............                       |  38% [plot_density]                    |                                             |...............                      |  40%                                   |                                             |................                     |  43% [plot_frequency_bar]              |                                             |.................                    |  45%                                   |                                             |..................                   |  48% [plot_response_bar]               |                                             |..................                   |  50%                                   |                                             |...................                  |  52% [plot_with_bar]                   |                                             |....................                 |  55%                                   |                                             |.....................                |  57% [plot_normal_qq]

##   |                                             |......................               |  60%                                   |                                             |.......................              |  62% [plot_response_qq]                |                                             |........................             |  64%                                   |                                             |.........................            |  67% [plot_by_qq]                      |                                             |..........................           |  69%                                   |                                             |..........................           |  71% [correlation_analysis]

##   |                                             |...........................          |  74%                                   |                                             |............................         |  76% [principal_component_analysis]

##   |                                             |.............................        |  79%                                   |                                             |..............................       |  81% [bivariate_distribution_header]   |                                             |...............................      |  83%                                   |                                             |................................     |  86% [plot_response_boxplot]           |                                             |.................................    |  88%                                   |                                             |.................................    |  90% [plot_by_boxplot]                 |                                             |..................................   |  93%                                   |                                             |...................................  |  95% [plot_response_scatterplot]       |                                             |.................................... |  98%                                   |                                             |.....................................| 100% [plot_by_scatterplot]

## output file: /Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 2.Yarıyıl/OLC731_R ile İleri İstatistik Uygulamaları_Doç. Dr. Kübra Atalay Kabasakal /1.Hafta 16.02.2026/report.knit.md

## '/Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 1.Yarıyıl/OLC733_R Yazılımı ile Veri Analizi_Doç. Dr. Kübra Atalay Kabasakal/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/pandoc' +RTS -K512m -RTS '/Users/meltematasoy/Desktop/Doktora Dersleri/Doktora Dersleri 2.Yarıyıl/OLC731_R ile İleri İstatistik Uygulamaları_Doç. Dr. Kübra Atalay Kabasakal /1.Hafta 16.02.2026/report.knit.md' --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc15803017ae15.html --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/latex-div.lua --lua-filter /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmarkdown/lua/table-classes.lua --embed-resources --standalone --variable bs3=TRUE --section-divs --table-of-contents --toc-depth 6 --template /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable theme=yeti --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /var/folders/7w/tm3yfklj31q6cm334dm97bj80000gn/T//RtmpyUcXLK/rmarkdown-str158015a7a076.html

## 
## Output created: report.html

library(haven)

screen <- expss::drop_var_labs(screen)

head(screen, 10)

## # A tibble: 10 × 8
##    SUBNO TIMEDRS ATTDRUG ATTHOUSE INCOME EMPLMNT MSTATUS  RACE
##    <dbl>   <dbl>   <dbl>    <dbl>  <dbl>   <dbl>   <dbl> <dbl>
##  1     1       1       8       27      5       1       2     1
##  2     2       3       7       20      6       0       2     1
##  3     3       0       8       23      3       0       2     1
##  4     4      13       9       28      8       1       2     1
##  5     5      15       7       24      1       1       2     1
##  6     6       3       8       25      4       0       2     1
##  7     7       2       7       30      6       1       2     1
##  8     8       0       7       24      6       1       2     1
##  9     9       7       7       20      2       1       2     1
## 10    10       4       8       30      8       0       1     1

library(naniar)

## 
## Attaching package: 'naniar'

## The following object is masked from 'package:skimr':
## 
##     n_complete

library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

any_na(screen)

## [1] TRUE

n_miss(screen)

## [1] 27

prop_miss(screen)

## [1] 0.007258065

screen %>%
  is.na(
  ) %>%
  colSums()

##    SUBNO  TIMEDRS  ATTDRUG ATTHOUSE   INCOME  EMPLMNT  MSTATUS     RACE 
##        0        0        0        1       26        0        0        0

miss_var_summary(screen)

## # A tibble: 8 × 3
##   variable n_miss pct_miss
##   <chr>     <int>    <num>
## 1 INCOME       26    5.59 
## 2 ATTHOUSE      1    0.215
## 3 SUBNO         0    0    
## 4 TIMEDRS       0    0    
## 5 ATTDRUG       0    0    
## 6 EMPLMNT       0    0    
## 7 MSTATUS       0    0    
## 8 RACE          0    0

miss_var_table(screen)

## # A tibble: 3 × 3
##   n_miss_in_var n_vars pct_vars
##           <int>  <int>    <dbl>
## 1             0      6     75  
## 2             1      1     12.5
## 3            26      1     12.5

miss_case_table(screen)

## # A tibble: 2 × 3
##   n_miss_in_case n_cases pct_cases
##            <int>   <int>     <dbl>
## 1              0     438     94.2 
## 2              1      27      5.81

miss_case_summary(screen)

## # A tibble: 465 × 3
##     case n_miss pct_miss
##    <int>  <int>    <dbl>
##  1    52      1     12.5
##  2    64      1     12.5
##  3    69      1     12.5
##  4    77      1     12.5
##  5   118      1     12.5
##  6   135      1     12.5
##  7   161      1     12.5
##  8   172      1     12.5
##  9   173      1     12.5
## 10   174      1     12.5
## # ℹ 455 more rows

library(rlang)

## Warning: package 'rlang' was built under R version 4.5.2

## 
## Attaching package: 'rlang'

## The following object is masked from 'package:data.table':
## 
##     :=

library(UpSetR) #nainar paketi hata verirse önce bu iki pkaeti de aktifleştirelim

gg_miss_upset(screen) #bu sayede iki farklı değişkende ortak eksik veri var mı bunu görebiliriz

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the UpSetR package.
##   Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the UpSetR package.
##   Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

vis_miss(screen)

#ggplottaki biçimsel düzenlemelerin hepsini nainar pkaeti ile de yapabiliriz

gg_miss_upset(screen)

## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

MCAR TESTİ

#nainar ile devam

mcar_test(data=screen[,c(2,3,4,5,6, 7,8)])

## # A tibble: 1 × 4
##   statistic    df p.value missing.patterns
##       <dbl> <dbl>   <dbl>            <int>
## 1      19.6    12  0.0752                3

screen_MAR <- screen

screen_MAR$INCOME_m <- screen_MAR$INCOME

library(finalfit)

explanatory=c("TIMEDRS", "ATTDRUG", "ATTHOUSE")

dependent = "INCOME_m"

screen_MAR %>% missing_compare(dependent, explanatory) %>%
  knitr::kable(row.names = FALSE, align = c("l","l","r","r","r"))

Missing data analysis: INCOME_m		Not missing	Missing	p
TIMEDRS	Mean (SD)	7.9 (11.1)	7.6 (7.4)	0.891
ATTDRUG	Mean (SD)	7.7 (1.2)	7.9 (1.0)	0.368
ATTHOUSE	Mean (SD)	23.5 (4.5)	23.7 (4.2)	0.860

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.2.0     ✔ tidyr     1.3.1
## ✔ readr     2.1.5     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ rlang:::=()              masks data.table:::=()
## ✖ ggplot2::%+%()           masks psych::%+%()
## ✖ ggplot2::alpha()         masks psych::alpha()
## ✖ data.table::between()    masks dplyr::between()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ data.table::first()      masks dplyr::first()
## ✖ purrr::flatten()         masks rlang::flatten()
## ✖ purrr::flatten_chr()     masks rlang::flatten_chr()
## ✖ purrr::flatten_dbl()     masks rlang::flatten_dbl()
## ✖ purrr::flatten_int()     masks rlang::flatten_int()
## ✖ purrr::flatten_lgl()     masks rlang::flatten_lgl()
## ✖ purrr::flatten_raw()     masks rlang::flatten_raw()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ lubridate::hour()        masks data.table::hour()
## ✖ purrr::invoke()          masks rlang::invoke()
## ✖ lubridate::isoweek()     masks data.table::isoweek()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ data.table::last()       masks dplyr::last()
## ✖ lubridate::mday()        masks data.table::mday()
## ✖ lubridate::minute()      masks data.table::minute()
## ✖ lubridate::month()       masks data.table::month()
## ✖ lubridate::quarter()     masks data.table::quarter()
## ✖ lubridate::second()      masks data.table::second()
## ✖ purrr::splice()          masks rlang::splice()
## ✖ purrr::transpose()       masks data.table::transpose()
## ✖ lubridate::wday()        masks data.table::wday()
## ✖ lubridate::week()        masks data.table::week()
## ✖ lubridate::yday()        masks data.table::yday()
## ✖ lubridate::year()        masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

miss_test <- screen_MAR %>%
  mutate(miss_income = is.na(INCOME))

VERİ SİLME

Liste olarak

na.omit(screen)

## # A tibble: 438 × 8
##    SUBNO TIMEDRS ATTDRUG ATTHOUSE INCOME EMPLMNT MSTATUS  RACE
##    <dbl>   <dbl>   <dbl>    <dbl>  <dbl>   <dbl>   <dbl> <dbl>
##  1     1       1       8       27      5       1       2     1
##  2     2       3       7       20      6       0       2     1
##  3     3       0       8       23      3       0       2     1
##  4     4      13       9       28      8       1       2     1
##  5     5      15       7       24      1       1       2     1
##  6     6       3       8       25      4       0       2     1
##  7     7       2       7       30      6       1       2     1
##  8     8       0       7       24      6       1       2     1
##  9     9       7       7       20      2       1       2     1
## 10    10       4       8       30      8       0       1     1
## # ℹ 428 more rows

VERİ ATAMA

Ortalama atama

screen3 <- screen

screen3$INCOME[is.na(screen3$INCOME)] <- 
  mean(screen3$INCOME, na.rm = TRUE)

sd(screen3$INCOME)

## [1] 2.350128

sd(screen$INCOME, na.rm = TRUE)

## [1] 2.418875

#ortalama atadığımız için standart sapma küçüldü

Döngü ile ortalama atama

screen4 <- screen [,2:5]

for(i in 1:ncol(screen4)) {
  screen4[,i][is.na(screen4[,i])] <-
    mean(screen4[,i], na.rm=TRUE)
}

any_na(screen4)

## [1] FALSE

Apply ailesi ile ortalama atama

screen4 <- data.frame(lapply(screen4, function(x) {
  x[is.na(x)] <- mean(x, na.rm = TRUE)
  x
}))

any_na(screen4)

## [1] FALSE

NOT: MEDYAN ATAMAK ORTALAMA ATAMAKTAN DAHA İYİDİR ÇÜNKÜ DEĞİŞKENLİĞE ETKİSİ DAHA AZDIR.

1.Hafta Ev Çalışması

Meltem Atasoy

2026-03-08