n <- 100
mostra <- rnorm(n, 50, 5)
hist(mostra, breaks="scott", freq=FALSE)
curve(dnorm(x, 50, 5), add=TRUE, col="red")
mean(mostra)
## [1] 49.83781
sd(mostra)
## [1] 4.343212
qqnorm(mostra)
n <- 1e6
mostra <- rnorm(n, 50, 5)
hist(mostra, breaks="scott", freq=FALSE)
curve(dnorm(x, 50, 5), add=TRUE, col="red")
mean(mostra)
## [1] 49.99809
sd(mostra)
## [1] 4.992202
N <- 1e4
n <- 10
mostra <- matrix(rnorm(N*n, 50, 5), ncol=n)
mitjanes <- apply(mostra, 1, mean)
(sdm <- 5/sqrt(n))
## [1] 1.581139
hist(mitjanes, breaks="scott", freq=FALSE)
curve(dnorm(x, 50, sdm), add=TRUE, col="red")
mean(mitjanes)
## [1] 50.01396
sd(mitjanes)
## [1] 1.554832
qqnorm(mitjanes)
qqline(mitjanes)
vars <- apply(mostra, 1, var)
hist(vars, breaks="scott", freq=FALSE)
curve(dchisq(x/5^2*(n-1), n-1)/25*(n-1), add=TRUE, col="red")
qqplot(qchisq(ppoints(500),n-1), vars)
c) Genereu mostres de mida 1000 i torneu a observar com es distribueix la seva mitjana i la seva variància.
N <- 1e4
n <- 1000
mostra <- matrix(rnorm(N*n, 50, 5), ncol=n)
mitjanes <- apply(mostra, 1, mean)
(sdm <- 5/sqrt(n))
## [1] 0.1581139
hist(mitjanes, breaks="scott", freq=FALSE)
curve(dnorm(x, 50, sdm), add=TRUE, col="red")
mean(mitjanes)
## [1] 50.00092
sd(mitjanes)
## [1] 0.1605208
qqnorm(mitjanes)
qqline(mitjanes)
vars <- apply(mostra, 1, var)
hist(vars, breaks="scott", freq=FALSE)
curve(dchisq(x/5^2*(n-1), n-1)/25*(n-1), add=TRUE, col="red")
qqplot(qchisq(ppoints(500),n-1), vars)
Repetiu l’exercici anterior amb una distribució no normal.
(mitjd <- 50/2)
## [1] 25
(vard <- 50^2/12)
## [1] 208.3333
(sdd <- sqrt(vard))
## [1] 14.43376
n <- 100
mostra <- runif(n, 0,50)
hist(mostra, breaks="scott", freq=FALSE)
curve(dunif(x, 0,50), add=TRUE, col="red")
mean(mostra)
## [1] 25.38484
sd(mostra)
## [1] 15.10414
n <- 1e6
mostra <- runif(n, 0,50)
hist(mostra, breaks="scott", freq=FALSE)
curve(dunif(x, 0,50), add=TRUE, col="red")
mean(mostra)
## [1] 24.9861
sd(mostra)
## [1] 14.44057
N <- 1e4
n <- 10
mostra <- matrix(runif(N*n, 0,50), ncol=n)
mitjanes <- apply(mostra, 1, mean)
(sdm <- sdd/sqrt(n))
## [1] 4.564355
hist(mitjanes, breaks="scott", freq=FALSE)
curve(dnorm(x, mitjd, sdm), add=TRUE, col="red")
mean(mitjanes)
## [1] 25.02209
sd(mitjanes)
## [1] 4.568931
qqnorm(mitjanes)
qqline(mitjanes)
vars <- apply(mostra, 1, var)
hist(vars, breaks="scott", freq=FALSE)
curve(dchisq(x/sdd^2*(n-1), n-1)/sdd^2*(n-1), add=TRUE, col="red")
qqplot(qchisq(ppoints(500),n-1), vars)
N <- 1e4
n <- 1000
mostra <- matrix(runif(N*n, 0,50), ncol=n)
mitjanes <- apply(mostra, 1, mean)
(sdm <- sdd/sqrt(n))
## [1] 0.4564355
hist(mitjanes, breaks="scott", freq=FALSE)
curve(dnorm(x, mitjd, sdm), add=TRUE, col="red")
mean(mitjanes)
## [1] 25.00218
sd(mitjanes)
## [1] 0.4536775
qqnorm(mitjanes)
qqline(mitjanes)
vars <- apply(mostra, 1, var)
hist(vars, breaks="scott", freq=FALSE)
curve(dchisq(x/sdd^2*(n-1), n-1)/sdd^2*(n-1), add=TRUE, col="red")
qqplot(qchisq(ppoints(500),n-1), vars)
lambda <- .1
(mitjd <- lambda)
## [1] 0.1
(vard <- lambda)
## [1] 0.1
(sdd <- sqrt(vard))
## [1] 0.3162278
n <- 100
mostra <- rpois(n, lambda)
hist(mostra, breaks="scott")
points(0:10,dpois(0:10, lambda)*n, col="red", lwt=3)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "lwt" is not a
## graphical parameter
mean(mostra)
## [1] 0.05
sd(mostra)
## [1] 0.2190429
n <- 1e6
mostra <- rpois(n, lambda)
hist(mostra, breaks="scott")
points(0:10,dpois(0:10, lambda)*n, col="red", lwt=3)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "lwt" is not a
## graphical parameter
mean(mostra)
## [1] 0.099861
sd(mostra)
## [1] 0.3160995
N <- 1e4
n <- 10
mostra <- matrix(rpois(N*n, lambda), ncol=n)
mitjanes <- apply(mostra, 1, mean)
(sdm <- sdd/sqrt(n))
## [1] 0.1
hist(mitjanes, breaks="scott", freq=FALSE)
curve(dnorm(x, mitjd, sdm), add=TRUE, col="red")
mean(mitjanes)
## [1] 0.10227
sd(mitjanes)
## [1] 0.1010389
qqnorm(mitjanes)
qqline(mitjanes)
vars <- apply(mostra, 1, var)
hist(vars, breaks="scott", freq=FALSE)
curve(dchisq(x/sdd^2*(n-1), n-1)/sdd^2*(n-1), add=TRUE, col="red")
qqplot(qchisq(ppoints(500),n-1), vars)
N <- 1e4
n <- 1000
mostra <- matrix(rpois(N*n, lambda), ncol=n)
mitjanes <- apply(mostra, 1, mean)
(sdm <- sdd/sqrt(n))
## [1] 0.01
hist(mitjanes, breaks="scott", freq=FALSE)
curve(dnorm(x, mitjd, sdm), add=TRUE, col="red")
mean(mitjanes)
## [1] 0.1000053
sd(mitjanes)
## [1] 0.01004761
qqnorm(mitjanes)
qqline(mitjanes)
vars <- apply(mostra, 1, var)
hist(vars, breaks="scott", freq=FALSE)
curve(dchisq(x/sdd^2*(n-1), n-1)/sdd^2*(n-1), add=TRUE, col="red")
qqplot(qchisq(ppoints(500),n-1), vars)
n <- length(mtcars$hp)
mean(mtcars$hp) + qt(c(.025,.975), n-1)*sd(mtcars$hp)/sqrt(n)
## [1] 121.9679 171.4071
# Automàticament:
t.test(mtcars$hp)
##
## One Sample t-test
##
## data: mtcars$hp
## t = 12.103, df = 31, p-value = 2.794e-13
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 121.9679 171.4071
## sample estimates:
## mean of x
## 146.6875
mean(mtcars$hp) + qt(c(.05,.95), n-1)*sd(mtcars$hp)/sqrt(n)
## [1] 126.1373 167.2377
t.test(mtcars$hp, conf.level = .9)$conf.int
## [1] 126.1373 167.2377
## attr(,"conf.level")
## [1] 0.9
mean(mtcars$hp) + qt(c(.005,.995), n-1)*sd(mtcars$hp)/sqrt(n)
## [1] 113.4288 179.9462
t.test(mtcars$hp, conf.level = .99)$conf.int
## [1] 113.4288 179.9462
## attr(,"conf.level")
## [1] 0.99
table(mtcars$am)
##
## 0 1
## 19 13
(n <- length(mtcars$am))
## [1] 32
(p0 <- sum(mtcars$am==0)/n)
## [1] 0.59375
p0 + qnorm(c(.025,.975))*sqrt(p0*(1-p0)/n)
## [1] 0.4235845 0.7639155
# Automàticament:
prop.test(table(mtcars$am))
##
## 1-sample proportions test with continuity correction
##
## data: table(mtcars$am), null probability 0.5
## X-squared = 0.78125, df = 1, p-value = 0.3768
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.4078543 0.7578086
## sample estimates:
## p
## 0.59375
N <- 1e6
n <- 500
p <- .4
mostra <- rbinom(N, n, p)
p0 <- mostra/n
int1 <- p0 + qnorm(.025)*sqrt(p0*(1-p0)/n)
int2 <- p0 - qnorm(.025)*sqrt(p0*(1-p0)/n)
dins <- (int1<p & int2>p)
table(dins)/N
## dins
## FALSE TRUE
## 0.049745 0.950255
N <- 1e6
n <- 50
p <- .1
mostra1 <- rbinom(N, n, p)
p0 <- mostra1/n
int1 <- p0 + qnorm(.025)*sqrt(p0*(1-p0)/n)
int2 <- p0 - qnorm(.025)*sqrt(p0*(1-p0)/n)
dins <- (int1<p & int2>p)
table(dins)/N
## dins
## FALSE TRUE
## 0.120987 0.879013
#La diferència entre els dos casos és en com estem de lluny de la normalitat,
#tenint en compte que la fórmula de l'interval de confiança assumeix
#la normalitat.
hist(mostra1)
qqnorm(mostra1[1:10000])
qqline(mostra1[1:10000])
hist(mostra)
qqnorm(mostra[1:10000])
qqline(mostra[1:10000])