1

v <- seq(1, 100, 2)
v

##  [1]  1  3  5  7  9 11 13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45
## [24] 47 49 51 53 55 57 59 61 63 65 67 69 71 73 75 77 79 81 83 85 87 89 91
## [47] 93 95 97 99

2

v[v>60 & v<80]

##  [1] 61 63 65 67 69 71 73 75 77 79

3

var(v[v>60 & v<80])

## [1] 36.66667

4

dt <- data.frame(R = c(2.27, 1.98, 1.69, 1.88, 1.64, 2.14),
           H = c(8.28, 8.04, 9.06, 8.70, 7.58, 8.34))
cbind(dt, V= (dt$R^2)* pi * dt$H / 3)

##      R    H        V
## 1 2.27 8.28 44.67974
## 2 1.98 8.04 33.00768
## 3 1.69 9.06 27.09756
## 4 1.88 8.70 32.20057
## 5 1.64 7.58 21.34939
## 6 2.14 8.34 39.99652

5

dt1 <- data.frame(x = c(3.15, -.55, -.35, .16),
                  y = c(2.93, -.35, -.25, -.12))
X <- matrix(c(rep(1, nrow(dt1)), dt1$x), ncol = 2)

solve(t(X) %*% X) %*% t(X) %*% dt1$y

##             [,1]
## [1,] 0.001483229
## [2,] 0.914550657

lm(y ~ x, data = dt1)

## 
## Call:
## lm(formula = y ~ x, data = dt1)
## 
## Coefficients:
## (Intercept)            x  
##    0.001483     0.914551

lsfit(x = dt1$x, y = dt1$y)

## $coefficients
##   Intercept           X 
## 0.001483229 0.914550657 
## 
## $residuals
## [1]  0.0476822  0.1515196  0.0686095 -0.2678113
## 
## $intercept
## [1] TRUE
## 
## $qr
## $qt
## [1] -1.105000000  2.731595328  0.003732199 -0.318822892
## 
## $qr
##      Intercept          X
## [1,]      -2.0 -1.2050000
## [2,]       0.5  2.9868169
## [3,]       0.5  0.6032063
## [4,]       0.5  0.4324559
## 
## $qraux
## [1] 1.500000 1.670167
## 
## $rank
## [1] 2
## 
## $pivot
## [1] 1 2
## 
## $tol
## [1] 1e-07
## 
## attr(,"class")
## [1] "qr"

6, 7, 8

library(MASS)
table(cats$Sex)

## 
##  F  M 
## 47 97

library(dplyr)
cats %>% group_by(Sex) %>% summarise(avg = mean(Bwt), SD = sd(Bwt), count=n())

## # A tibble: 2 x 4
##      Sex      avg        SD count
##   <fctr>    <dbl>     <dbl> <int>
## 1      F 2.359574 0.2739879    47
## 2      M 2.900000 0.4674844    97

9

cats %>% group_by(Sex, Bwt>2.5) %>% 
  summarise(count = n()) %>% 
  mutate(prop = round(count/sum(count),2))

## # A tibble: 4 x 4
## # Groups:   Sex [2]
##      Sex `Bwt > 2.5` count  prop
##   <fctr>       <lgl> <int> <dbl>
## 1      F       FALSE    36  0.77
## 2      F        TRUE    11  0.23
## 3      M       FALSE    25  0.26
## 4      M        TRUE    72  0.74

10

catsF <- list(cats$Sex[cats$Sex=='F'])
catsM <- list(cats$Sex[cats$Sex=='M'])

Central Limit Theorem

set.seed(10)
samp <- c(small=3, medium=10, large=100)
functions <- c(normal = 'rnorm', exponential = 'rexp', binomial='rbinom' )
params <- list(
  list(mean = 1, sd=1),
  list(rate = 1),
  list(size=1, prob=.1)
  )

dt <- tibble(distribution = invoke_map(functions, params, 1000))
dt %>% mutate(small = map(.x = .$distribution, .f = ~ replicate(1000, mean(sample(.x, 3)))),
                     medium = map(.x = .$distribution, .f = ~ replicate(1000, mean(sample(.x, 10)))),
                     large = map(.x = .$distribution, .f = ~ replicate(1000, mean(sample(.x, 100))))) %>% 
  mutate(distribution=names(functions)) %>% gather(sample, values, -distribution) %>% unnest() %>% 
        ggplot2::ggplot(data = ., aes(x = values)) + 
        geom_histogram(aes(y = ..density.., alpha= .7)) + 
        geom_density(aes(color = '#D55E00', alpha=.2)) +
        facet_wrap(distribution ~ sample , scales = 'free') +
        theme(legend.position = 'none') +
         ggtitle('Central Limit Theorem', 
                          subtitle = 'Means distributions are bell shaped when sample >30')

Homework 1

Dario Bonaretti

8/22/2017

1

2

3

4

5

6, 7, 8

9

10

Central Limit Theorem