Kullanılan Paketle

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stevemisc)
## 
## Attaching package: 'stevemisc'
## 
## The following object is masked from 'package:lubridate':
## 
##     dst
## 
## The following object is masked from 'package:dplyr':
## 
##     tbl_df
library(knitr)
library(summarytools)
## 
## Attaching package: 'summarytools'
## 
## The following object is masked from 'package:tibble':
## 
##     view
library(outliers)
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggpmisc)
## Zorunlu paket yükleniyor: ggpp
## Registered S3 methods overwritten by 'ggpp':
##   method                  from   
##   heightDetails.titleGrob ggplot2
##   widthDetails.titleGrob  ggplot2
## 
## Attaching package: 'ggpp'
## 
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(psych)
## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:outliers':
## 
##     outlier
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(sur)
## 
## Attaching package: 'sur'
## 
## The following object is masked from 'package:psych':
## 
##     skew
library(moments)
library(corrplot)
## corrplot 0.95 loaded
library(olsrr)
## 
## Attaching package: 'olsrr'
## 
## The following object is masked from 'package:datasets':
## 
##     rivers
library(dplyr)
library(naniar)

Veri Setinin Yüklenmesi

library(mlbench)
data("PimaIndiansDiabetes2")
veri <- PimaIndiansDiabetes2
head(veri)
##   pregnant glucose pressure triceps insulin mass pedigree age diabetes
## 1        6     148       72      35      NA 33.6    0.627  50      pos
## 2        1      85       66      29      NA 26.6    0.351  31      neg
## 3        8     183       64      NA      NA 23.3    0.672  32      pos
## 4        1      89       66      23      94 28.1    0.167  21      neg
## 5        0     137       40      35     168 43.1    2.288  33      pos
## 6        5     116       74      NA      NA 25.6    0.201  30      neg

EKsiki Veri Düzenlemesi

veri <- veri %>% 
mutate(insulin = ifelse(is.na(insulin), mean(insulin, na.rm =TRUE),insulin)) %>% na.omit()
any_na(veri) #en fazla eksik veriye sahip insulin değişkenini ortalama ile doldurduk ve diğer eksik verileri sildik
## [1] FALSE

Çok Değişkenli Normallik Sayıltısı

library(sur)
attach(veri)   # çarpıklık değerini veren kod
## The following object is masked from package:datasets:
## 
##     pressure
skew(age)
## [1] 1.269711
se.skew(age)   #çarpıklığın standart hatası
## [1] 0.1059011
skew.ratio(age)  #çarpıklık değerinin çarpıklığın standart hatasına bölme 
## [1] 11.9896
skew(age) /se.skew(age)  #yukarıdaki değerle aynı değer
## [1] 11.9896
library(moments)                 #çarpıklık değerinin hipotaz testi kontrolü
library(labelled)
jarque.test(remove_labels(age))
## 
##  Jarque-Bera Normality Test
## 
## data:  remove_labels(age)
## JB = 172.89, p-value < 2.2e-16
## alternative hypothesis: greater
jarque.test(remove_labels(mass))
## 
##  Jarque-Bera Normality Test
## 
## data:  remove_labels(mass)
## JB = 70.222, p-value = 5.551e-16
## alternative hypothesis: greater
skew.ratio(mass)
## [1] 5.947561

Doğrusallık

plot(veri$mass, veri$age, main = "age ~ mass", xlab = "mass", ylab = "age", pch = 19, col = "blue")

plot(veri$insulin, veri$age, main = "age ~ insulin", xlab = "insulin", ylab = "age", pch = 19, col = "blue")

plot(veri$triceps, veri$age, main = "age ~ triceps", xlab = "triceps", ylab = "age", pch = 19, col = "blue")

pairs(veri[,1:8])

# Veri Dönüştürme

age1 <- log(age+1)
describe(age)
##    vars   n  mean    sd median trimmed mad min max range skew kurtosis   se
## X1    1 532 31.61 10.76     28   29.95 8.9  21  81    60 1.27     1.15 0.47
describe(age1)
##    vars   n mean  sd median trimmed  mad  min  max range skew kurtosis   se
## X1    1 532 3.44 0.3   3.37    3.41 0.32 3.09 4.41  1.32 0.75    -0.44 0.01

Çoklu Bağlantı ve Tekillik

cor(veri[,1:8]) %>% kable(digit=2)
pregnant glucose pressure triceps insulin mass pedigree age
pregnant 1.00 0.13 0.20 0.10 0.07 0.01 0.01 0.64
glucose 0.13 1.00 0.22 0.23 0.50 0.25 0.17 0.28
pressure 0.20 0.22 1.00 0.23 0.09 0.31 0.01 0.35
triceps 0.10 0.23 0.23 1.00 0.16 0.65 0.12 0.16
insulin 0.07 0.50 0.09 0.16 1.00 0.20 0.12 0.18
mass 0.01 0.25 0.31 0.65 0.20 1.00 0.15 0.07
pedigree 0.01 0.17 0.01 0.12 0.12 0.15 1.00 0.07
age 0.64 0.28 0.35 0.16 0.18 0.07 0.07 1.00
library(ggcorrplot)
ggcorrplot(cor(veri[,1:8]),lab=TRUE,lab_size = 4)

cor_mat <- cor(veri[,1:8])

cor_pmat <- ggcorrplot::cor_pmat(x = veri[,1:8])

ggcorrplot::ggcorrplot(cor_mat,lab = T, lab_size = 4,p.mat = cor_pmat)

library(PerformanceAnalytics)
## Zorunlu paket yükleniyor: xts
## Zorunlu paket yükleniyor: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following objects are masked from 'package:moments':
## 
##     kurtosis, skewness
## The following object is masked from 'package:graphics':
## 
##     legend
chart.Correlation(veri[, 1:8])

model <- lm(age ~ mass  +   glucose  +  insulin  +pressure + triceps+ pedigree+ pregnant ,
data = veri)
library(olsrr)
ols_vif_tol(model) %>% kable(digit=2)
Variables Tolerance VIF
mass 0.53 1.88
glucose 0.69 1.44
insulin 0.74 1.34
pressure 0.84 1.19
triceps 0.57 1.76
pedigree 0.95 1.05
pregnant 0.94 1.07
library(car)
## Zorunlu paket yükleniyor: carData
## 
## Attaching package: 'carData'
## The following objects are masked from 'package:sur':
## 
##     Anscombe, States
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
vif(model)
##     mass  glucose  insulin pressure  triceps pedigree pregnant 
## 1.881770 1.439573 1.343740 1.185145 1.756606 1.047364 1.069395
library(mctest)
mctest(model,type= "i")
## 
## Call:
## imcdiag(mod = mod, method = method, corr = FALSE, vif = vif, 
##     tol = tol, conf = conf, cvif = cvif, ind1 = ind1, ind2 = ind2, 
##     leamer = leamer, all = all)
## 
## 
## All Individual Multicollinearity Diagnostics Result
## 
##             VIF    TOL      Wi      Fi Leamer   CVIF Klein   IND1   IND2
## mass     1.8818 0.5314 77.1548 92.7622 0.7290 2.9446     0 0.0061 1.8995
## glucose  1.4396 0.6947 38.4626 46.2431 0.8335 2.2527     0 0.0079 1.2378
## insulin  1.3437 0.7442 30.0773 36.1615 0.8627 2.1027     0 0.0085 1.0370
## pressure 1.1851 0.8438 16.2002 19.4773 0.9186 1.8545     0 0.0096 0.6333
## triceps  1.7566 0.5693 66.2030 79.5949 0.7545 2.7488     0 0.0065 1.7460
## pedigree 1.0474 0.9548  4.1444  4.9827 0.9771 1.6389     0 0.0109 0.1833
## pregnant 1.0694 0.9351  6.0720  7.3003 0.9670 1.6734     0 0.0107 0.2631
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test
## 
## insulin , triceps , pedigree , coefficient(s) are non-significant may be due to multicollinearity
## 
## R-square of y on all x: 0.4936 
## 
## * use method argument to check which regressors may be the reason of collinearity
## ===================================
eigprop(model)
## 
## Call:
## eigprop(mod = model)
## 
##   Eigenvalues      CI (Intercept)   mass glucose insulin pressure triceps
## 1      6.9094  1.0000      0.0004 0.0004  0.0009  0.0037   0.0005  0.0013
## 2      0.4369  3.9767      0.0002 0.0005  0.0006  0.0130   0.0000  0.0009
## 3      0.2780  4.9855      0.0001 0.0003  0.0025  0.2274   0.0003  0.0009
## 4      0.2315  5.4637      0.0052 0.0067  0.0002  0.5165   0.0073  0.0276
## 5      0.0794  9.3277      0.0304 0.0025  0.0569  0.0247   0.0357  0.5227
## 6      0.0342 14.2157      0.0289 0.0370  0.8977  0.1962   0.0862  0.0419
## 7      0.0175 19.8656      0.0020 0.7576  0.0046  0.0101   0.4315  0.3193
## 8      0.0131 22.9682      0.9328 0.1950  0.0367  0.0084   0.4384  0.0854
##   pedigree pregnant
## 1   0.0048   0.0056
## 2   0.0543   0.8728
## 3   0.7358   0.0257
## 4   0.1845   0.0450
## 5   0.0038   0.0084
## 6   0.0039   0.0006
## 7   0.0097   0.0402
## 8   0.0031   0.0018
## 
## ===============================
## Row 7==> mass, proportion 0.757614 >= 0.50 
## Row 6==> glucose, proportion 0.897676 >= 0.50 
## Row 4==> insulin, proportion 0.516464 >= 0.50 
## Row 5==> triceps, proportion 0.522724 >= 0.50 
## Row 3==> pedigree, proportion 0.735846 >= 0.50 
## Row 2==> pregnant, proportion 0.872805 >= 0.50

Regresyon analizleri

#Bundan sonraki analizlerime age, mass, insulin değişkenleri ile devam edeceğim

library(broom)
cor_1 <- cor.test(~ age + mass , data = veri)
tidy(cor_1)  %>% kable(digit=3)
estimate statistic p.value parameter conf.low conf.high method alternative
0.073 1.695 0.091 530 -0.012 0.157 Pearson’s product-moment correlation two.sided
cor_1 <- cor.test(~ age + insulin , data = veri)
tidy(cor_1)  %>% kable(digit=3)
estimate statistic p.value parameter conf.low conf.high method alternative
0.176 4.124 0 530 0.093 0.257 Pearson’s product-moment correlation two.sided
cor_1 <- cor.test(~ mass + insulin , data = veri)
tidy(cor_1)  %>% kable(digit=3)
estimate statistic p.value parameter conf.low conf.high method alternative
0.199 4.663 0 530 0.115 0.279 Pearson’s product-moment correlation two.sided
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
ggpairs(veri[c("age", "mass", "insulin")])

library(scatterplot3d)
scatterplot3d(veri[c("age", "mass", "insulin")],
              pch = 16,
              color="steelblue", 
              angle=75)

scatterplot3d(veri[c("age", "mass", "insulin")],
              pch = 16, color="steelblue",
              angle=75,
              box = FALSE,type = "h")

library(rgl)
plot3d(veri$age, veri$mass, veri$insulin,
xlab = "age", ylab = "mass", 
zlab = "insulin", 
type = "s",size = 1.5,col = "red")
rglwidget()