HC_10

x <- list(a = 1:5, b = rnorm(10))
lapply(x, mean)

## $a
## [1] 3
## 
## $b
## [1] 0.2065104

sapply(x, mean)

##         a         b 
## 3.0000000 0.2065104

x1 <- list( a= c(1:5,NA),
            b= rnorm(10))
lapply(x1, mean, na.rm=TRUE)

## $a
## [1] 3
## 
## $b
## [1] 0.2940407

x <- list(a = 1:4, b = rnorm(10), c = rnorm(20, 1), d = rnorm(100, 5))
lapply(x, mean)

## $a
## [1] 2.5
## 
## $b
## [1] -0.03896723
## 
## $c
## [1] 1.361732
## 
## $d
## [1] 4.930519

x <- 1:4
lapply(x, runif)

## [[1]]
## [1] 0.2584043
## 
## [[2]]
## [1] 0.5565635 0.1654728
## 
## [[3]]
## [1] 0.1260730 0.7567073 0.2995682
## 
## [[4]]
## [1] 0.4429644 0.5854476 0.5840174 0.7560373

x <- 1:4
lapply(x, runif, min = 0, max = 10)

## [[1]]
## [1] 0.8255283
## 
## [[2]]
## [1] 5.372480 5.305196
## 
## [[3]]
## [1] 4.51547459 6.94614289 0.04974884
## 
## [[4]]
## [1] 2.3299425 7.5564161 7.8531321 0.7896624

x <- list(a = matrix(1:4, 2, 2), b = matrix(1:6, 3, 2))

lapply(x, function(elt) { elt[,1] })

## $a
## [1] 1 2
## 
## $b
## [1] 1 2 3

lapply(x1, function(emre){emre*0.75})

## $a
## [1] 0.75 1.50 2.25 3.00 3.75   NA
## 
## $b
##  [1] -1.0878211  0.3877561 -0.6389411  0.6539283  1.2427780 -0.6454784
##  [7]  0.2964486  0.5849739  0.7070885  0.7045725

x <- list(a = 1:4, b = rnorm(10), c = rnorm(20, 1), d = rnorm(100, 5))
lapply(x, mean)

## $a
## [1] 2.5
## 
## $b
## [1] 0.2175888
## 
## $c
## [1] 1.08462
## 
## $d
## [1] 5.017358

sapply(x, mean)

##         a         b         c         d 
## 2.5000000 0.2175888 1.0846200 5.0173580

x <- c(rnorm(10), runif(10), rnorm(10, 5)) # 30 elemanlı vektor
f <- gl(3, 10) # 3 kaetgorili bagimsiz değişken
split(x, f)

## $`1`
##  [1]  0.09217836 -0.22006110 -1.59456258  0.28222376 -0.88551978 -0.01896593
##  [7]  0.45715737  0.20407362 -0.02152475  0.28548222
## 
## $`2`
##  [1] 0.260272064 0.440674181 0.879722366 0.007292072 0.573706792 0.398069528
##  [7] 0.353848374 0.954055202 0.520953819 0.656104981
## 
## $`3`
##  [1] 4.694031 6.451051 3.774135 2.967040 3.923932 5.157548 5.700786 4.465467
##  [9] 4.093606 4.777955

lapply(split(x, f), mean)

## $`1`
## [1] -0.1419519
## 
## $`2`
## [1] 0.5044699
## 
## $`3`
## [1] 4.600555

library(datasets)
head(airquality)

##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6

s <- split(airquality, airquality$Month)
str(s)

## List of 5
##  $ 5:'data.frame':   31 obs. of  6 variables:
##   ..$ Ozone  : int [1:31] 41 36 12 18 NA 28 23 19 8 NA ...
##   ..$ Solar.R: int [1:31] 190 118 149 313 NA NA 299 99 19 194 ...
##   ..$ Wind   : num [1:31] 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##   ..$ Temp   : int [1:31] 67 72 74 62 56 66 65 59 61 69 ...
##   ..$ Month  : int [1:31] 5 5 5 5 5 5 5 5 5 5 ...
##   ..$ Day    : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
##  $ 6:'data.frame':   30 obs. of  6 variables:
##   ..$ Ozone  : int [1:30] NA NA NA NA NA NA 29 NA 71 39 ...
##   ..$ Solar.R: int [1:30] 286 287 242 186 220 264 127 273 291 323 ...
##   ..$ Wind   : num [1:30] 8.6 9.7 16.1 9.2 8.6 14.3 9.7 6.9 13.8 11.5 ...
##   ..$ Temp   : int [1:30] 78 74 67 84 85 79 82 87 90 87 ...
##   ..$ Month  : int [1:30] 6 6 6 6 6 6 6 6 6 6 ...
##   ..$ Day    : int [1:30] 1 2 3 4 5 6 7 8 9 10 ...
##  $ 7:'data.frame':   31 obs. of  6 variables:
##   ..$ Ozone  : int [1:31] 135 49 32 NA 64 40 77 97 97 85 ...
##   ..$ Solar.R: int [1:31] 269 248 236 101 175 314 276 267 272 175 ...
##   ..$ Wind   : num [1:31] 4.1 9.2 9.2 10.9 4.6 10.9 5.1 6.3 5.7 7.4 ...
##   ..$ Temp   : int [1:31] 84 85 81 84 83 83 88 92 92 89 ...
##   ..$ Month  : int [1:31] 7 7 7 7 7 7 7 7 7 7 ...
##   ..$ Day    : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
##  $ 8:'data.frame':   31 obs. of  6 variables:
##   ..$ Ozone  : int [1:31] 39 9 16 78 35 66 122 89 110 NA ...
##   ..$ Solar.R: int [1:31] 83 24 77 NA NA NA 255 229 207 222 ...
##   ..$ Wind   : num [1:31] 6.9 13.8 7.4 6.9 7.4 4.6 4 10.3 8 8.6 ...
##   ..$ Temp   : int [1:31] 81 81 82 86 85 87 89 90 90 92 ...
##   ..$ Month  : int [1:31] 8 8 8 8 8 8 8 8 8 8 ...
##   ..$ Day    : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
##  $ 9:'data.frame':   30 obs. of  6 variables:
##   ..$ Ozone  : int [1:30] 96 78 73 91 47 32 20 23 21 24 ...
##   ..$ Solar.R: int [1:30] 167 197 183 189 95 92 252 220 230 259 ...
##   ..$ Wind   : num [1:30] 6.9 5.1 2.8 4.6 7.4 15.5 10.9 10.3 10.9 9.7 ...
##   ..$ Temp   : int [1:30] 91 92 93 93 87 84 80 78 75 73 ...
##   ..$ Month  : int [1:30] 9 9 9 9 9 9 9 9 9 9 ...
##   ..$ Day    : int [1:30] 1 2 3 4 5 6 7 8 9 10 ...

lapply(s, function(x) {
        colMeans(x[, c("Ozone", "Solar.R", "Wind")])
}) ## anaomin fonkisyon kullanıldığına dikkat ediniz.

## $`5`
##    Ozone  Solar.R     Wind 
##       NA       NA 11.62258 
## 
## $`6`
##     Ozone   Solar.R      Wind 
##        NA 190.16667  10.26667 
## 
## $`7`
##      Ozone    Solar.R       Wind 
##         NA 216.483871   8.941935 
## 
## $`8`
##    Ozone  Solar.R     Wind 
##       NA       NA 8.793548 
## 
## $`9`
##    Ozone  Solar.R     Wind 
##       NA 167.4333  10.1800

sapply(s, function(x) {
        colMeans(x[, c("Ozone", "Solar.R", "Wind")])
})

##                5         6          7        8        9
## Ozone         NA        NA         NA       NA       NA
## Solar.R       NA 190.16667 216.483871       NA 167.4333
## Wind    11.62258  10.26667   8.941935 8.793548  10.1800

sapply(s, function(x) {
        colMeans(x[, c("Ozone", "Solar.R", "Wind")], 
                 na.rm = TRUE)
})

##                 5         6          7          8         9
## Ozone    23.61538  29.44444  59.115385  59.961538  31.44828
## Solar.R 181.29630 190.16667 216.483871 171.857143 167.43333
## Wind     11.62258  10.26667   8.941935   8.793548  10.18000

library(datasets)
 head(airquality)

##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6

 lapply(airquality,mean, na.rm= TRUE)

## $Ozone
## [1] 42.12931
## 
## $Solar.R
## [1] 185.9315
## 
## $Wind
## [1] 9.957516
## 
## $Temp
## [1] 77.88235
## 
## $Month
## [1] 6.993464
## 
## $Day
## [1] 15.80392

 sapply(airquality,mean, na.rm= TRUE)

##      Ozone    Solar.R       Wind       Temp      Month        Day 
##  42.129310 185.931507   9.957516  77.882353   6.993464  15.803922

## veri üret
x <- c(rnorm(10), runif(10), rnorm(10, 1))
## factor değişken
f <- gl(3, 10)   
f

##  [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3
## Levels: 1 2 3

tapply(x, f, mean)

##          1          2          3 
## -0.2598788  0.4447014  1.0283033

tapply(x, f, mean, simplify = FALSE)

## $`1`
## [1] -0.2598788
## 
## $`2`
## [1] 0.4447014
## 
## $`3`
## [1] 1.028303

tapply(x, f, range)

## $`1`
## [1] -1.622877  1.087143
## 
## $`2`
## [1] 0.04331139 0.83504364
## 
## $`3`
## [1] -0.1043797  2.5634304

isim <- c("Ali","Elif","Su","Deniz","Aras","Berk","Can","Ece","Efe","Arda")
boy <- c(160,165,170,155,167,162,169,158,160,164)
kilo <- c(55,55,57,50,48,65,58,62,45,47)
cinsiyet <- c("erkek","kadin","kadin","kadin","erkek",
"erkek","erkek","kadin","erkek","erkek")
cinsiyet <- factor(cinsiyet)
beden <- c("S","M","S","M","S","L","M","L","S","S")
beden <- factor(beden)
# tapply() fonksiyonunun liste veri yapısına uygulanması
Liste <- list(isim=isim,boy=boy,cinsiyet=cinsiyet,beden=beden,kilo=kilo)
df <- data.frame(isim=isim,boy=boy,cinsiyet=cinsiyet,beden=beden,kilo=kilo)
tapply(Liste$boy, Liste$cinsiyet, sort)

## $erkek
## [1] 160 160 162 164 167 169
## 
## $kadin
## [1] 155 158 165 170

tapply(Liste$boy, Liste$cinsiyet, sort, decreasing=TRUE)

## $erkek
## [1] 169 167 164 162 160 160
## 
## $kadin
## [1] 170 165 158 155

tapply(df$boy, Liste$cinsiyet, sort)

## $erkek
## [1] 160 160 162 164 167 169
## 
## $kadin
## [1] 155 158 165 170

tapply(df$boy, Liste$cinsiyet, mean)

##    erkek    kadin 
## 163.6667 162.0000

tapply(df$boy, Liste$cinsiyet, sort, decreasing=TRUE)

## $erkek
## [1] 169 167 164 162 160 160
## 
## $kadin
## [1] 170 165 158 155

by(df$boy, Liste$cinsiyet, sort)

## Liste$cinsiyet: erkek
## [1] 160 160 162 164 167 169
## ------------------------------------------------------------ 
## Liste$cinsiyet: kadin
## [1] 155 158 165 170

by(df$boy, Liste$cinsiyet, sort, decreasing=TRUE)

## Liste$cinsiyet: erkek
## [1] 169 167 164 162 160 160
## ------------------------------------------------------------ 
## Liste$cinsiyet: kadin
## [1] 170 165 158 155

by(df$boy, Liste$cinsiyet, mean)

## Liste$cinsiyet: erkek
## [1] 163.6667
## ------------------------------------------------------------ 
## Liste$cinsiyet: kadin
## [1] 162

x <- matrix(rnorm(200), 20, 10)
apply(x, 2, mean)  ## Her sütunun ortalamasını alın

##  [1]  0.0229302103  0.2253763312  0.1144782597 -0.4328472762 -0.0005140581
##  [6] -0.1046179842 -0.4014810255 -0.0318485021 -0.2537872669  0.0512321573

apply(x, 1, sum)   ## Her satırın ortalamasını alın

##  [1]  4.5966666 -0.5384966 -4.5171906 -1.5917761 -1.3888403 -1.9147264
##  [7] -2.0905864  5.0469212 -5.0389567 -5.7016294  2.2033481  1.0811123
## [13] -3.2616023 -2.0125029 -4.2721438  3.0069081  0.1410559  2.5829376
## [19] -1.2009040 -1.3511772

set.seed(12)
S1 <- sample(rnorm(10000, 50, 5), 100, replace=TRUE)
Matris1 <- matrix(S1, nrow=20, ncol=5)

apply(Matris1, 2, mean) # Fonksiyonun ikinci girdisi olan 2  sütun elamanlarını temsil etmektedir.

## [1] 48.20485 52.13701 49.38658 50.61689 48.60479

apply(Matris1, 2, summary)

##             [,1]     [,2]     [,3]     [,4]     [,5]
## Min.    39.00080 40.23309 39.04749 39.32974 37.74364
## 1st Qu. 45.21933 48.44165 45.57123 47.36401 43.71252
## Median  49.31295 52.24410 49.49029 51.08794 47.62144
## Mean    48.20485 52.13701 49.38658 50.61689 48.60479
## 3rd Qu. 52.40540 55.97719 52.70180 54.36235 53.32016
## Max.    55.24910 63.33272 58.88203 59.93019 60.51715

apply(Matris1, 1, summary)

##             [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
## Min.    45.82396 39.16789 51.63544 40.23309 39.04749 44.81304 39.73637 51.11418
## 1st Qu. 47.78055 39.32974 52.46878 43.82775 47.16408 47.46234 46.19462 51.96290
## Median  48.36804 46.24689 53.43269 47.65095 49.56534 49.64774 49.12984 52.65739
## Mean    50.47126 45.82933 54.50679 47.52181 48.65629 52.22224 50.10067 54.92558
## 3rd Qu. 54.95931 51.70256 56.11501 49.31343 52.65050 59.25790 55.94640 55.56069
## Max.    55.42443 52.69959 58.88203 56.58380 54.85404 59.93019 59.49613 63.33272
##             [,9]    [,10]    [,11]    [,12]    [,13]    [,14]    [,15]    [,16]
## Min.    44.96852 39.00080 43.36682 48.42947 42.13211 42.73818 40.55680 41.37856
## 1st Qu. 48.34900 48.83882 52.38428 50.17014 48.46619 46.50319 43.21988 42.18138
## Median  52.21976 53.65437 52.38428 51.40809 48.88713 50.98943 45.46715 47.83169
## Mean    50.61489 50.35382 51.18599 53.07152 50.44334 49.60777 46.24742 47.67032
## 3rd Qu. 53.31388 54.20555 52.91266 54.83276 55.24910 51.36429 46.82348 50.54044
## Max.    54.22331 56.06955 54.88190 60.51715 57.48218 56.44375 55.16980 56.41952
##            [,17]    [,18]    [,19]    [,20]
## Min.    40.53528 40.55680 37.74364 46.71473
## 1st Qu. 46.04637 44.03153 47.73063 49.31247
## Median  47.98124 44.46635 49.30321 51.96828
## Mean    48.55872 45.45876 47.52113 50.83282
## 3rd Qu. 49.85073 46.40143 50.16318 52.82962
## Max.    58.37998 51.83770 52.66500 53.33901

bagil_degiskenlik <- function(x){
(sd(x)/mean(x))*100
}
apply(Matris1, 2, bagil_degiskenlik)

## [1] 11.24914 10.05771 11.02709 10.59998 12.97312

apply(Matris1, 2, function(x){(sd(x)/mean(x))*100})

## [1] 11.24914 10.05771 11.02709 10.59998 12.97312

 mapply(rep, 1:4, 4:1)

## [[1]]
## [1] 1 1 1 1
## 
## [[2]]
## [1] 2 2 2
## 
## [[3]]
## [1] 3 3
## 
## [[4]]
## [1] 4

noise <- function(n, mean, sd) {
      rnorm(n, mean, sd)
}
## 5 random sayı
noise(5, 1, 2)

## [1] -4.327419  1.768021  1.886192  1.184867  3.169347

mapply(noise, 1:5, 1:5, 2)

## [[1]]
## [1] -0.7104655
## 
## [[2]]
## [1] 5.069163 2.364621
## 
## [[3]]
## [1] 5.224162 4.429571 5.242948
## 
## [[4]]
## [1] 8.278157 6.074560 3.496312 1.782874
## 
## [[5]]
## [1] 3.7470981 4.2376843 9.4055567 0.9957188 5.8647980

list(noise(1, 1, 2), noise(2, 2, 2),
     noise(3, 3, 2), noise(4, 4, 2),
     noise(5, 5, 2))

## [[1]]
## [1] -1.039076
## 
## [[2]]
## [1] 0.3540303 2.8484079
## 
## [[3]]
## [1] 2.250261 2.707497 2.360474
## 
## [[4]]
## [1] 2.837683 5.003308 2.944540 2.339083
## 
## [[5]]
## [1] 6.066594 4.941666 5.764200 5.716390 6.835816

sumsq <- function(mu, sigma, x) {
        sum(((x - mu) / sigma)^2)
}

x <- rnorm(100)       ## veri üret
sumsq(1:10, 1:10, x)  ## İstediğimiz bu değil

## [1] 119.891

mapply(sumsq, 1:10, 1:10, MoreArgs = list(x = x))

##  [1] 236.5110 143.2120 123.2428 115.3451 111.2744 108.8388 107.2354 106.1073
##  [9] 105.2742 104.6354

set.seed(10)
v1 <- rnorm(10,0,1)
sumsq <- function(mu, sigma, x){
  sum(((x-mu)/ sigma)^2)
}
sumsq(0,1,v1)

## [1] 6.8161

sumsq(mean(v1), sd(v1),v1)

## [1] 9

set.seed(987)
std <- rnorm(n=300,50,10)
hist(std)

pnorm(1)

## [1] 0.8413447

pnorm(-1:1)

## [1] 0.1586553 0.5000000 0.8413447

qnorm(c(0.025, 0.975))  # %95 güven aralığı

## [1] -1.959964  1.959964

round(qnorm(c(0.005, 0.995)), 2)  # %99 güven aralığı

## [1] -2.58  2.58

rnorm(10, mean = 0, sd = 1)

##  [1]  0.71883622  1.33501467  1.58250235  0.34783644  0.84599059 -1.27280039
##  [7] -0.64053682 -0.07203593 -0.72280017 -0.65800036

x <- 0:2
plot(x, dbinom(x, size = 2, p = 0.5), 
     type = "h", col = "red", 
     lwd = 10,
     main = "Bozuk Parayı İki Kere Havaya Atma")

x <- 0:30
plot(x, dbinom(x, size = 30, p = 0.5), 
     type = "h", 
     main="n=30 için Binom Dağılımı")

x <- seq(-4, 4, 0.01)
plot(x, dnorm(x), 
     type = "l", 
     main = "Normal Dağılım Yoğunluk Fonksiyonu")

plot(x, pnorm(x), type = "l",
     main = "Normal Dağılım Yığılım Fonksiyonu")

IQ <- seq(55, 145, 0.1)
plot(IQ, dnorm(IQ, mean = 100, sd = 15), type = "l")
abline(v = 100, col = "red")

sample_means <- replicate(1000,
                    mean(rnorm(3, mean = 100, sd = 15)))
hist(sample_means, 
     main = "3 Kişilik Örneklem Ortalamaları",
     col = "skyblue")

sample_means <- replicate(1000, 
                    mean(rnorm(30, mean = 100, sd = 15)))
hist(sample_means, 
     main = "30 Kişilik Örneklem Ortalamaları", 
     col = "lightgreen")

nreps <- 10000
nsmall <- 2
nlarge <- 30
sampdist.mean.small <- numeric(nreps)
sampdist.mean.large <- numeric(nreps)

# Küçük örneklem büyüklüğü için
par(mfrow = c(2,2))
for (i in 1:nreps) {
  sample <- runif(nsmall, 0, 100)
  sampdist.mean.small[i] <- mean(sample)
}

# Histogram çizme
hist(sampdist.mean.small, 
     main = "Küçük Örneklem Ortalamalarının \n Dağılımı (n = 2)",
     xlab = "Örneklem Ortalamaları",
     col = "lightblue")

# Q-Q grafiği
qqnorm(sampdist.mean.small, 
       main = "Q-Q Grafiği: Küçük Örneklem (n = 2)")
qqline(sampdist.mean.small, col = "red")

# Büyük örneklem büyüklüğü için
for (i in 1:nreps) {
  sample <- runif(nlarge, 0, 100)
  sampdist.mean.large[i] <- mean(sample)
}

# Histogram çizme
hist(sampdist.mean.large, 
     main = "Büyük Örneklem Ortalamalarının \n Dağılımı (n = 30)",
     xlab = "Örneklem Ortalamaları",
     col = "lightgreen")

# Q-Q grafiği
qqnorm(sampdist.mean.large, 
       main = "Q-Q Grafiği: Büyük Örneklem (n = 30)")
qqline(sampdist.mean.large, col = "blue")

uniform_samples <- runif(1000, min = 0, max = 10)
hist(uniform_samples, 
     main = "Uniform Dağılım", col = "orange")

curve(dt(x, df = 10), 
      from = -4, to = 4, main = "t Dağılımı")

x <- 0:10
plot(x, dpois(x, lambda = 3), 
     type = "h", main = "Poisson Dağılımı")

curve(dchisq(x, df = 5), from = 0, to = 20, 
      main = "Ki-Kare Dağılımı")

HC_10

Hamit ÇÖKERDENOĞLU

2024-12-10