for ve while alternatifi ve daha hızlı çalışıyor. apply satır sütun seçtiriyo lapply listelere yapıyo mapply çokdeğişkenli apply ailesi gibi
x <- list(a = 1:5, b = rnorm(10))
lapply(x, mean)
## $a
## [1] 3
##
## $b
## [1] 0.1049172
sapply(x,mean)
## a b
## 3.0000000 0.1049172
x1 <- list(a = c(1:5,NA), b = rnorm(10))
lapply(x1,mean,na.rm=T)
## $a
## [1] 3
##
## $b
## [1] 0.0650278
x <- list(a = 1:4, b = rnorm(10), c = rnorm(20, 1), d = rnorm(100, 5))
lapply(x, mean)
## $a
## [1] 2.5
##
## $b
## [1] -0.1814151
##
## $c
## [1] 1.472409
##
## $d
## [1] 5.129652
x <- 1:4
lapply(x, runif)
## [[1]]
## [1] 0.02353275
##
## [[2]]
## [1] 0.8950128 0.2245127
##
## [[3]]
## [1] 0.5878939 0.3158162 0.5634900
##
## [[4]]
## [1] 0.04060321 0.38988073 0.20732812 0.99165710
x <- 1:4
lapply(x, runif, min = 0, max = 10)
## [[1]]
## [1] 3.136077
##
## [[2]]
## [1] 4.560684 9.103325
##
## [[3]]
## [1] 1.4572757 0.1372734 2.7204905
##
## [[4]]
## [1] 5.4668820 0.2594048 7.0372107 1.2581190
x <- list(a = matrix(1:4, 2, 2), b = matrix(1:6, 3, 2))
x
## $a
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
##
## $b
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
lapply(x, function(elt) {
elt[,1] })
## $a
## [1] 1 2
##
## $b
## [1] 1 2 3
## isimsiz deneme ders sirasi
lapply(x, function(emre){
emre*0.75
})
## $a
## [,1] [,2]
## [1,] 0.75 2.25
## [2,] 1.50 3.00
##
## $b
## [,1] [,2]
## [1,] 0.75 3.00
## [2,] 1.50 3.75
## [3,] 2.25 4.50
x <- list(a = 1:4, b = rnorm(10), c = rnorm(20, 1), d = rnorm(100, 5))
lapply(x, mean)
## $a
## [1] 2.5
##
## $b
## [1] 0.418248
##
## $c
## [1] 1.013773
##
## $d
## [1] 4.964033
sapply(x,mean)
## a b c d
## 2.500000 0.418248 1.013773 4.964033
x <- c(rnorm(10), runif(10), rnorm(10, 5)) # 30 elemanlı vektor
f <- gl(3, 10) # 3 kaetgorili bagimsiz değişken
split(x, f)
## $`1`
## [1] 0.53315722 1.38792272 0.49489709 -0.25661998 2.50487874 1.47471663
## [7] -0.91050752 -1.39646667 -0.01288247 0.37618828
##
## $`2`
## [1] 0.20126414 0.37267344 0.62036008 0.77539084 0.46335481 0.55151657
## [7] 0.30698995 0.61709063 0.66287111 0.06985639
##
## $`3`
## [1] 4.216555 3.988690 5.058337 4.947725 4.919567 6.253618 5.475024 4.210617
## [9] 5.330289 4.305454
lapply(split(x, f), mean)
## $`1`
## [1] 0.4195284
##
## $`2`
## [1] 0.4641368
##
## $`3`
## [1] 4.870588
library(datasets)
head(airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
s <- split(airquality, airquality$Month)
str(s)
## List of 5
## $ 5:'data.frame': 31 obs. of 6 variables:
## ..$ Ozone : int [1:31] 41 36 12 18 NA 28 23 19 8 NA ...
## ..$ Solar.R: int [1:31] 190 118 149 313 NA NA 299 99 19 194 ...
## ..$ Wind : num [1:31] 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## ..$ Temp : int [1:31] 67 72 74 62 56 66 65 59 61 69 ...
## ..$ Month : int [1:31] 5 5 5 5 5 5 5 5 5 5 ...
## ..$ Day : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
## $ 6:'data.frame': 30 obs. of 6 variables:
## ..$ Ozone : int [1:30] NA NA NA NA NA NA 29 NA 71 39 ...
## ..$ Solar.R: int [1:30] 286 287 242 186 220 264 127 273 291 323 ...
## ..$ Wind : num [1:30] 8.6 9.7 16.1 9.2 8.6 14.3 9.7 6.9 13.8 11.5 ...
## ..$ Temp : int [1:30] 78 74 67 84 85 79 82 87 90 87 ...
## ..$ Month : int [1:30] 6 6 6 6 6 6 6 6 6 6 ...
## ..$ Day : int [1:30] 1 2 3 4 5 6 7 8 9 10 ...
## $ 7:'data.frame': 31 obs. of 6 variables:
## ..$ Ozone : int [1:31] 135 49 32 NA 64 40 77 97 97 85 ...
## ..$ Solar.R: int [1:31] 269 248 236 101 175 314 276 267 272 175 ...
## ..$ Wind : num [1:31] 4.1 9.2 9.2 10.9 4.6 10.9 5.1 6.3 5.7 7.4 ...
## ..$ Temp : int [1:31] 84 85 81 84 83 83 88 92 92 89 ...
## ..$ Month : int [1:31] 7 7 7 7 7 7 7 7 7 7 ...
## ..$ Day : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
## $ 8:'data.frame': 31 obs. of 6 variables:
## ..$ Ozone : int [1:31] 39 9 16 78 35 66 122 89 110 NA ...
## ..$ Solar.R: int [1:31] 83 24 77 NA NA NA 255 229 207 222 ...
## ..$ Wind : num [1:31] 6.9 13.8 7.4 6.9 7.4 4.6 4 10.3 8 8.6 ...
## ..$ Temp : int [1:31] 81 81 82 86 85 87 89 90 90 92 ...
## ..$ Month : int [1:31] 8 8 8 8 8 8 8 8 8 8 ...
## ..$ Day : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
## $ 9:'data.frame': 30 obs. of 6 variables:
## ..$ Ozone : int [1:30] 96 78 73 91 47 32 20 23 21 24 ...
## ..$ Solar.R: int [1:30] 167 197 183 189 95 92 252 220 230 259 ...
## ..$ Wind : num [1:30] 6.9 5.1 2.8 4.6 7.4 15.5 10.9 10.3 10.9 9.7 ...
## ..$ Temp : int [1:30] 91 92 93 93 87 84 80 78 75 73 ...
## ..$ Month : int [1:30] 9 9 9 9 9 9 9 9 9 9 ...
## ..$ Day : int [1:30] 1 2 3 4 5 6 7 8 9 10 ...
lapply(s, function(x) {
colMeans(x[, c("Ozone", "Solar.R", "Wind")],na.rm = T)
}) ## anaomin fonkisyon kullanıldığına dikkat ediniz.
## $`5`
## Ozone Solar.R Wind
## 23.61538 181.29630 11.62258
##
## $`6`
## Ozone Solar.R Wind
## 29.44444 190.16667 10.26667
##
## $`7`
## Ozone Solar.R Wind
## 59.115385 216.483871 8.941935
##
## $`8`
## Ozone Solar.R Wind
## 59.961538 171.857143 8.793548
##
## $`9`
## Ozone Solar.R Wind
## 31.44828 167.43333 10.18000
sapply(s, function(x) {
colMeans(x[, c("Ozone", "Solar.R", "Wind")],na.rm = T)
})
## 5 6 7 8 9
## Ozone 23.61538 29.44444 59.115385 59.961538 31.44828
## Solar.R 181.29630 190.16667 216.483871 171.857143 167.43333
## Wind 11.62258 10.26667 8.941935 8.793548 10.18000
lapply(airquality,mean,na.rm=TRUE)
## $Ozone
## [1] 42.12931
##
## $Solar.R
## [1] 185.9315
##
## $Wind
## [1] 9.957516
##
## $Temp
## [1] 77.88235
##
## $Month
## [1] 6.993464
##
## $Day
## [1] 15.80392
sapply(airquality,mean,na.rm=T)
## Ozone Solar.R Wind Temp Month Day
## 42.129310 185.931507 9.957516 77.882353 6.993464 15.803922
str(tapply)
## function (X, INDEX, FUN = NULL, ..., default = NA, simplify = TRUE)
## veri üret
x <- c(rnorm(10), runif(10), rnorm(10, 1))
## factor değişken
f <- gl(3, 10)
f
## [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3
## Levels: 1 2 3
tapply(x, f, mean)
## 1 2 3
## 0.1427093 0.4712801 0.9041138
tapply(x, f, mean,simplify = F)
## $`1`
## [1] 0.1427093
##
## $`2`
## [1] 0.4712801
##
## $`3`
## [1] 0.9041138
isim <- c("Ali","Elif","Su","Deniz","Aras","Berk","Can","Ece","Efe","Arda")
boy <- c(160,165,170,155,167,162,169,158,160,164)
kilo <- c(55,55,57,50,48,65,58,62,45,47)
cinsiyet <- c("erkek","kadin","kadin","kadin","erkek",
"erkek","erkek","kadin","erkek","erkek")
cinsiyet <- factor(cinsiyet)
beden <- c("S","M","S","M","S","L","M","L","S","S")
beden <- factor(beden)
# tapply() fonksiyonunun liste veri yapısına uygulanması
Liste <- list(isim=isim,boy=boy,cinsiyet=cinsiyet,beden=beden,kilo=kilo)
df <- data.frame(isim=isim,boy=boy,cinsiyet=cinsiyet,beden=beden,kilo=kilo)
tapply(Liste$boy, Liste$cinsiyet, sort)
## $erkek
## [1] 160 160 162 164 167 169
##
## $kadin
## [1] 155 158 165 170
tapply(Liste$boy, Liste$cinsiyet, sort, decreasing=TRUE)
## $erkek
## [1] 169 167 164 162 160 160
##
## $kadin
## [1] 170 165 158 155
tapply(df$boy, Liste$cinsiyet, sort)
## $erkek
## [1] 160 160 162 164 167 169
##
## $kadin
## [1] 155 158 165 170
tapply(df$boy,Liste$cinsiyet,mean)
## erkek kadin
## 163.6667 162.0000
tapply(df$boy, Liste$cinsiyet, sort, decreasing=TRUE)
## $erkek
## [1] 169 167 164 162 160 160
##
## $kadin
## [1] 170 165 158 155
## arada dashed çizgi var, daha okunaklı olabilir bu
by(df$boy,Liste$cinsiyet,sort)
## Liste$cinsiyet: erkek
## [1] 160 160 162 164 167 169
## ------------------------------------------------------------
## Liste$cinsiyet: kadin
## [1] 155 158 165 170
by(df$boy,Liste$cinsiyet,sort,decreasing=T)
## Liste$cinsiyet: erkek
## [1] 169 167 164 162 160 160
## ------------------------------------------------------------
## Liste$cinsiyet: kadin
## [1] 170 165 158 155
by(df$boy, Liste$cinsiyet, mean)
## Liste$cinsiyet: erkek
## [1] 163.6667
## ------------------------------------------------------------
## Liste$cinsiyet: kadin
## [1] 162
x <- matrix(rnorm(200), 20, 10)
apply(x, 2, mean) ## Her sütunun ortalamasını alın
## [1] -0.10636781 -0.42940978 -0.07395770 0.13699964 0.07798346 -0.11717144
## [7] 0.02028809 -0.06216322 0.27046109 0.05426741
apply(x, 1, sum) ## Her satırın ortalamasını alın
## [1] -2.38193674 -0.01412203 3.07213969 0.28932252 -1.16225266 3.16706698
## [7] 0.75964607 6.10744552 1.24465580 3.94629705 -1.00555629 -8.48024394
## [13] -3.78759847 1.48614820 -1.36964454 2.44446785 -3.64519966 -2.56631136
## [19] -0.10507388 -2.58065527
set.seed(12)
S1 <- sample(rnorm(10000, 50, 5), 100, replace=TRUE)
Matris1 <- matrix(S1, nrow=20, ncol=5)
bagil_degiskenlik <- function(x){
(sd(x)/mean(x))*100
}
apply(Matris1, 2, bagil_degiskenlik)
## [1] 11.24914 10.05771 11.02709 10.59998 12.97312
apply(Matris1, 2,
function(x){(sd(x)/mean(x))*100})
## [1] 11.24914 10.05771 11.02709 10.59998 12.97312
##simülasyona lazım olur
# vpf, k, cor, vb. vb çok iyi
mapply(rep, 1:4, 4:1)
## [[1]]
## [1] 1 1 1 1
##
## [[2]]
## [1] 2 2 2
##
## [[3]]
## [1] 3 3
##
## [[4]]
## [1] 4
noise <- function(n, mean, sd) {
rnorm(n, mean, sd)
}
## 5 random sayı
noise(5, 1, 2)
## [1] -4.327419 1.768021 1.886192 1.184867 3.169347
mapply(noise, 1:5, 1:5, 2)
## [[1]]
## [1] -0.7104655
##
## [[2]]
## [1] 5.069163 2.364621
##
## [[3]]
## [1] 5.224162 4.429571 5.242948
##
## [[4]]
## [1] 8.278157 6.074560 3.496312 1.782874
##
## [[5]]
## [1] 3.7470981 4.2376843 9.4055567 0.9957188 5.8647980
list(noise(1, 1, 2), noise(2, 2, 2),
noise(3, 3, 2), noise(4, 4, 2),
noise(5, 5, 2))
## [[1]]
## [1] -1.039076
##
## [[2]]
## [1] 0.3540303 2.8484079
##
## [[3]]
## [1] 2.250261 2.707497 2.360474
##
## [[4]]
## [1] 2.837683 5.003308 2.944540 2.339083
##
## [[5]]
## [1] 6.066594 4.941666 5.764200 5.716390 6.835816
sumsq <- function(mu, sigma, x) {
sum(((x - mu) / sigma)^2)
}
x <- rnorm(100) ## veri üret
sumsq(1:10, 1:10, x) ## İstediğimiz bu değil
## [1] 119.891
mapply(sumsq, 1:10, 1:10, MoreArgs = list(x = x))
## [1] 236.5110 143.2120 123.2428 115.3451 111.2744 108.8388 107.2354 106.1073
## [9] 105.2742 104.6354
vsumsq <- Vectorize(sumsq, c("mu", "sigma"))
vsumsq(1:10, 1:10, x)
## [1] 236.5110 143.2120 123.2428 115.3451 111.2744 108.8388 107.2354 106.1073
## [9] 105.2742 104.6354
set.seed(10)
v1 <- rnorm(10,mean = 0,sd = 1)
sumsq(mean(v1),sigma = sd(v1),x = v1)
## [1] 9
set.seed(59) #çünkü tekirdağ
std <- rnorm(300,50,sd = 10)
hist(std)
#cumulative p
pnorm(0)
## [1] 0.5
pnorm(1)
## [1] 0.8413447
pnorm(c(-1,0,1))
## [1] 0.1586553 0.5000000 0.8413447
pnorm(2)
## [1] 0.9772499
# quantile karşılığında z değerini veriyo
qnorm(c(0.025,0.925))
## [1] -1.959964 1.439531
#qnorm(0.50) ile pnorm(0) birbirini doğrular
round(qnorm(c(0.005, 0.995)), 2) # %99 güven aralığı
## [1] -2.58 2.58
x <- 0:2
plot(x, dbinom(x, size = 2, p = 0.5),
type = "h", col = "red",
lwd = 10,
main = "Bozuk Parayı İki Kere Havaya Atma")
#30'un normal dağılım alameti farikası
x <- 0:30
plot(x, dbinom(x, size = 30, p = 0.5),
type = "h",
main="n=30 için Binom Dağılımı")
#yoğunluk
x <- seq(-4, 4, 0.01)
plot(x, dnorm(x),
type = "l",
main = "Normal Dağılım Yoğunluk Fonksiyonu")
#yiğilma
plot(x, pnorm(x), type = "l",
main = "Normal Dağılım Yığılım Fonksiyonu")
#merkezi limit
IQ <- seq(55, 145, 0.1)
plot(IQ, dnorm(IQ, mean = 100, sd = 15), type = "l")+abline(v = 100, col = "red")
## integer(0)
Üç kişilik örneklem ortalamaları
sample_means <- replicate(1000,
mean(rnorm(3,
mean = 100,
sd = 15)))
hist(sample_means,
main = "3 Kişilik Örneklem Ortalamaları",
col = "skyblue")
Otuz kişilik örneklem ortalamaları
sample_means <- replicate(1000,
mean(rnorm(30, mean = 100, sd = 15)))
hist(sample_means,
main = "30 Kişilik Örneklem Ortalamaları",
col = "lightgreen")
nreps <- 10000
nsmall <- 2
nlarge <- 30
sampdist.mean.small <- numeric(nreps)
sampdist.mean.large <- numeric(nreps)
# Küçük örneklem büyüklüğü için
par(mfrow = c(2,2))
for (i in 1:nreps) {
sample <- runif(nsmall, 0, 100)
sampdist.mean.small[i] <- mean(sample)
}
# Histogram çizme
hist(sampdist.mean.small,
main = "Küçük Örneklem Ortalamalarının \n Dağılımı (n = 2)",
xlab = "Örneklem Ortalamaları",
col = "lightblue")
# Q-Q grafiği
qqnorm(sampdist.mean.small,
main = "Q-Q Grafiği: Küçük Örneklem (n = 2)")
#qqline(sampdist.mean.small, col = "red")
# Büyük örneklem büyüklüğü için
for (i in 1:nreps) {
sample <- runif(nlarge, 0, 100)
sampdist.mean.large[i] <- mean(sample)
}
# Histogram çizme
hist(sampdist.mean.large,
main = "Büyük Örneklem Ortalamalarının \n Dağılımı (n = 30)",
xlab = "Örneklem Ortalamaları",
col = "lightgreen")
# Q-Q grafiği
qqnorm(sampdist.mean.large,
main = "Q-Q Grafiği: Büyük Örneklem (n = 30)")
#qqline(sampdist.mean.large, col = "blue")
uniform_samples <- runif(1000, min = 0, max = 10)
hist(uniform_samples,
main = "Uniform Dağılım", col = "orange")
T Dağılımı
curve(dt(x, df = 100),
from = -4, to = 4, main = "t Dağılımı")
# Poisson
x <- 0:15
plot(x, dpois(x, lambda = 3),
type = "h", main = "Poisson Dağılımı")
# Ki-kare
curve(dchisq(x, df = 5), from = 0, to = 20,
main = "Ki-Kare Dağılımı")
#örnek 1
sd <- 4
M <- 13
X <- 16.5
n <- 16
sd_hata <- sd / sqrt(n)
z <- (X - M) / sd_hata
z
## [1] 3.5
qnorm(c(0.025, 0.975)) # Kritik değerler
## [1] -1.959964 1.959964
#örnek 2
M <- 80
sd <- 10
n <- 25
X <- 81
sd_hata <- sd / sqrt(n)
z <- (X - M) / sd_hata
z
## [1] 0.5
qnorm(c(0.025, 0.975)) # Kritik değerler
## [1] -1.959964 1.959964
n <- 400
sd_hata <- sd / sqrt(n)
z <- (X - M) / sd_hata
z
## [1] 2
qnorm(c(0.025, 0.975))
## [1] -1.959964 1.959964