1
v <- seq(1, 100, 2)
v
## [1] 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45
## [24] 47 49 51 53 55 57 59 61 63 65 67 69 71 73 75 77 79 81 83 85 87 89 91
## [47] 93 95 97 99
2
v[v>60 & v<80]
## [1] 61 63 65 67 69 71 73 75 77 79
3
var(v[v>60 & v<80])
## [1] 36.66667
4
dt <- data.frame(R = c(2.27, 1.98, 1.69, 1.88, 1.64, 2.14),
H = c(8.28, 8.04, 9.06, 8.70, 7.58, 8.34))
cbind(dt, V= (dt$R^2)* pi * dt$H / 3)
## R H V
## 1 2.27 8.28 44.67974
## 2 1.98 8.04 33.00768
## 3 1.69 9.06 27.09756
## 4 1.88 8.70 32.20057
## 5 1.64 7.58 21.34939
## 6 2.14 8.34 39.99652
5
dt1 <- data.frame(x = c(3.15, -.55, -.35, .16),
y = c(2.93, -.35, -.25, -.12))
X <- matrix(c(rep(1, nrow(dt1)), dt1$x), ncol = 2)
solve(t(X) %*% X) %*% t(X) %*% dt1$y
## [,1]
## [1,] 0.001483229
## [2,] 0.914550657
lm(y ~ x, data = dt1)
##
## Call:
## lm(formula = y ~ x, data = dt1)
##
## Coefficients:
## (Intercept) x
## 0.001483 0.914551
lsfit(x = dt1$x, y = dt1$y)
## $coefficients
## Intercept X
## 0.001483229 0.914550657
##
## $residuals
## [1] 0.0476822 0.1515196 0.0686095 -0.2678113
##
## $intercept
## [1] TRUE
##
## $qr
## $qt
## [1] -1.105000000 2.731595328 0.003732199 -0.318822892
##
## $qr
## Intercept X
## [1,] -2.0 -1.2050000
## [2,] 0.5 2.9868169
## [3,] 0.5 0.6032063
## [4,] 0.5 0.4324559
##
## $qraux
## [1] 1.500000 1.670167
##
## $rank
## [1] 2
##
## $pivot
## [1] 1 2
##
## $tol
## [1] 1e-07
##
## attr(,"class")
## [1] "qr"
6, 7, 8
library(MASS)
table(cats$Sex)
##
## F M
## 47 97
library(dplyr)
cats %>% group_by(Sex) %>% summarise(avg = mean(Bwt), SD = sd(Bwt), count=n())
## # A tibble: 2 x 4
## Sex avg SD count
## <fctr> <dbl> <dbl> <int>
## 1 F 2.359574 0.2739879 47
## 2 M 2.900000 0.4674844 97
9
cats %>% group_by(Sex, Bwt>2.5) %>%
summarise(count = n()) %>%
mutate(prop = round(count/sum(count),2))
## # A tibble: 4 x 4
## # Groups: Sex [2]
## Sex `Bwt > 2.5` count prop
## <fctr> <lgl> <int> <dbl>
## 1 F FALSE 36 0.77
## 2 F TRUE 11 0.23
## 3 M FALSE 25 0.26
## 4 M TRUE 72 0.74
10
catsF <- list(cats$Sex[cats$Sex=='F'])
catsM <- list(cats$Sex[cats$Sex=='M'])
Central Limit Theorem
set.seed(10)
samp <- c(small=3, medium=10, large=100)
functions <- c(normal = 'rnorm', exponential = 'rexp', binomial='rbinom' )
params <- list(
list(mean = 1, sd=1),
list(rate = 1),
list(size=1, prob=.1)
)
dt <- tibble(distribution = invoke_map(functions, params, 1000))
dt %>% mutate(small = map(.x = .$distribution, .f = ~ replicate(1000, mean(sample(.x, 3)))),
medium = map(.x = .$distribution, .f = ~ replicate(1000, mean(sample(.x, 10)))),
large = map(.x = .$distribution, .f = ~ replicate(1000, mean(sample(.x, 100))))) %>%
mutate(distribution=names(functions)) %>% gather(sample, values, -distribution) %>% unnest() %>%
ggplot2::ggplot(data = ., aes(x = values)) +
geom_histogram(aes(y = ..density.., alpha= .7)) +
geom_density(aes(color = '#D55E00', alpha=.2)) +
facet_wrap(distribution ~ sample , scales = 'free') +
theme(legend.position = 'none') +
ggtitle('Central Limit Theorem',
subtitle = 'Means distributions are bell shaped when sample >30')
