3.1

ex_3_1 = c(6.0, 0.5, 0.4, 0.7, 0.8, 6.0,
           5.0, 0.6, 1.2, 0.3, 0.2, 0.5,
           0.5,  10, 0.2, 0.2, 1.7, 3.0)
ex_3_1 = ex_3_1[order(ex_3_1)]
ex_3_1

##  [1]  0.2  0.2  0.2  0.3  0.4  0.5  0.5  0.5  0.6  0.7  0.8  1.2  1.7  3.0
## [15]  5.0  6.0  6.0 10.0

n = length(ex_3_1)
p = 0.5

# nonparametric
non_p_Rl = (n - 1.96 * sqrt(n))/2
non_p_Rl

## [1] 4.842212

non_p_Ru = (n + 1.96 * sqrt(n))/2 +1
non_p_Ru

## [1] 14.15779

non_p_Rl = quantile(ex_3_1, probs = non_p_Rl/n)
non_p_Ru = quantile(ex_3_1, probs = non_p_Ru/n)

# parametric
p_Rl = exp(mean(log(ex_3_1)) - 2.064 * sqrt(var(log(ex_3_1)) / n))
p_Ru = exp(mean(log(ex_3_1)) + 2.064 * sqrt(var(log(ex_3_1)) / n))

library("ggplot2")
ggplot() +
  geom_boxplot(aes(x = "ex_3_1",
                   y = ex_3_1)) +
  geom_hline(yintercept = c(non_p_Rl, non_p_Ru),
             size = 2, color = "blue") +
  geom_hline(yintercept = c(p_Rl, p_Ru),
             size = 2, color = "red")

Parametric confidence interval seems more appropriate in this case.

3-2

ex_3_2 = c(1.0, 0.2, 1.2, 1.0, 0.3, 0.1, 
           0.1, 0.4, 3.2, 0.3, 0.4, 1.8, 
           0.9, 0.1, 0.2, 0.3, 0.5)
ex_3_2 = ex_3_2[order(ex_3_2)]

n = length(ex_3_2)
CI_mean_l = mean(ex_3_2) - 2.064 * sqrt(var(ex_3_2) / n)
CI_mean_u = mean(ex_3_2) + 2.064 * sqrt(var(ex_3_2) / n)
CI_mean_l

## [1] 0.3060061

CI_mean_u

## [1] 1.105759

mean(ex_3_2)

## [1] 0.7058824

exp(mean(log(ex_3_2)))

## [1] 0.4277575

ggplot() +
  geom_boxplot(aes(x = "ex_3_2",
                   y = ex_3_2)) +
  geom_hline(yintercept = mean(ex_3_2),
             size = 2, color = "blue") +
  geom_hline(yintercept = exp(mean(log(ex_3_2))),
             size = 2, color = "red")

Lognormal mean seems more appropriate in this case. Because given data shows skewde distribution.

3-3

ex_3_3 = c(0.001, 0.030, 0.10, 0.003, 0.040, 0.454,
           0.007, 0.041, 0.49, 0.020, 0.077, 1.02)
boxplot(ex_3_3)

ex_3_3 = ex_3_3[order(ex_3_3)]
ex_3_3

##  [1] 0.001 0.003 0.007 0.020 0.030 0.040 0.041 0.077 0.100 0.454 0.490
## [12] 1.020

n = length(ex_3_3)
n

## [1] 12

# nonparametric
non_p_Rl = (n - 1.96 * sqrt(n))/2
non_p_Rl

## [1] 2.60518

non_p_Ru = (n + 1.96 * sqrt(n))/2 +1
non_p_Ru

## [1] 10.39482

non_p_Rl = quantile(ex_3_3, probs = non_p_Rl/n)
non_p_Ru = quantile(ex_3_3, probs = non_p_Ru/n)

# parametric
p_Rl = exp(mean(log(ex_3_3)) - 2.064 * sqrt(var(log(ex_3_3)) / n))
p_Ru = exp(mean(log(ex_3_3)) + 2.064 * sqrt(var(log(ex_3_3)) / n))

library("ggplot2")
ggplot() +
  geom_boxplot(aes(x = "ex_3_3",
                   y = ex_3_3)) +
  geom_hline(yintercept = c(non_p_Rl, non_p_Ru),
             size = 2, color = "blue") +
  geom_hline(yintercept = c(p_Rl, p_Ru),
             size = 2, color = "red") +
  geom_point(aes(x = "ex_3_3",
                y = 0.85),
             size = 5,
             color = "green")

Yes, it does.
Parametric confidence interval seems more appropriate than non-parametric confidence interval. Because later values are located out of inter quantile range.

3-4

ex_3_4 = read.csv("c2.csv")
ex_3_4

##    癤풷ear Flow..cfs.
## 1     1941        369
## 2     1942        683
## 3     1943        923
## 4     1944       1193
## 5     1945        413
## 6     1946       1025
## 7     1947        894
## 8     1948        859
## 9     1949       1157
## 10    1950        524
## 11    195l        327
## 12    1952        574
## 13    1953        762
## 14    1954        578
## 15    1955        379
## 16    1956        374
## 17    1957        581
## 18    1958        581
## 19    1959        530
## 20    1960        929

n = nrow(ex_3_4)
n

## [1] 20

boxplot(ex_3_4[, 2])

ex_3_4 = ex_3_4[, 2]


# nonparametric - mean
mean_Rl = mean(ex_3_4) - 2.064 * sqrt(var(ex_3_4) / n)
mean_Ru = mean(ex_3_4) + 2.064 * sqrt(var(ex_3_4) / n)

# nonparametric - median
non_p_Rl = (n - 1.96 * sqrt(n))/2
non_p_Rl

## [1] 5.617307

non_p_Ru = (n + 1.96 * sqrt(n))/2 +1
non_p_Ru

## [1] 15.38269

non_p_Rl = quantile(ex_3_4, probs = non_p_Rl/n)
non_p_Ru = quantile(ex_3_4, probs = non_p_Ru/n)

ggplot() +
  geom_boxplot(aes(x = "ex_3_4",
                   y = ex_3_4)) +
  geom_hline(yintercept = c(mean_Rl, mean_Ru),
             size = 2, color = "blue") + # mean
  geom_hline(yintercept = c(non_p_Rl, non_p_Ru),
             size = 2, color = "red") # median

3-5

ex_3_5 = read.csv("c3.csv", header = FALSE)
ex_3_5 = as.numeric(unlist(ex_3_5))
ex_3_5 = ex_3_5[!is.na(ex_3_5)]
ex_3_5 = ex_3_5[order(ex_3_5)]
ex_3_5

##   [1]      1      2      3      4      5      6      7      8      9     10
##  [11]     11     12     13     14     15     16     17     18     19     20
##  [21]     21     22     23     24     25     26     27     28     29     30
##  [31]     31     32     33     34     35     36     37   1730   1750   1770
##  [41]   1810   1940   1940   1960   1960   1980   2010   2010   2030   2050
##  [51]   2050   2050   2080   2080   2100   2170   2190   2190   2260   2290
##  [61]   2290   2290   2290   2320   2360   2390   2390   2390   2430   2460
##  [71]   2490   2490   2490   2600   2620   2680   2680   2700   2700   2700
##  [81]   2700   2760   2790   2810   2840   2840   2840   2840   2840   2840
##  [91]   2870   2950   3040   3040   3100   3130   3220   3220   3220   3310
## [101]   3370   3410   3440   3440   3470   3500   3570   3600   3630   3630
## [111]   3660   3660   3700   3700   3700   3760   3760   3760   3900   3900
## [121]   4000   4000   4040   4040   4140   4210   4320   4460   4500   4540
## [131]   4570   4610   4610   4680   4840   4960   5000   5120   5200   5280
## [141]   5360   5500   5690   5900   5900   5900   5900   5900   5900   6120
## [151]   6120   6120   6120   6120   6340   6340   6800   6800   6800   6800
## [161]   6800   7040   7040   7280   7280   7280   7280   7280   7280   7530
## [171]   7530   7530   7780   7780   7780   7780   7780   8040   8040   8040
## [181]   8040   8040   8040   8040   8040   8300   8300   8300   8580   8580
## [191]   8580   8580   8580   8850   8850   8850   8850   8850   9120   9120
## [201]   9120   9120   9400   9400   9400   9400   9400   9400   9700   9700
## [211]   9700   9700   9700   9700   9700   9700   9700  10000  10000  10000
## [221]  10000  10000  10300  10300  10300  10300  10300  10300  10300  10600
## [231]  10600  10600  10600  10600  10600  10900  10900  10900  10900  11200
## [241]  11200  11500  11500  11500  11500  11500  11500  11800  11800  11800
## [251]  12200  12200  12200  12500  12500  12800  12800  12800  13200  13200
## [261]  13600  13900  13900  13900  13900  13900  14300  14300  14300  14300
## [271]  14600  14600  15000  15000  15000  15000  15400  15400  15400  15400
## [281]  15800  15800  16200  16200  16200  16200  16700  17100  17100  17500
## [291]  17500  17500  18000  18000  18000  18400  18800  18800  19300  19300
## [301]  19800  19800  19800  20200  20200  20700  20700  22100  22100  22100
## [311]  22100  22100  22100  23100  23100  23600  23600  24100  24100  24500
## [321]  25100  25600  25600  26200  26200  27200  27800  27800  27800  27800
## [331]  27800  27800  27900  28900  31100  31100  31100  32300  35900  35900
## [341]  37100  37100  38300  38300  39500  39500  40700  42000  47200  49800
## [351]  49800  50400  55000  60600  64800  73300  73300  77800  77800  80800
## [361]  86800  91300 168000 250000 309999

Rl = exp(mean(log(ex_3_5)) + 0.898 * sd(log(ex_3_5)))
Rl

## [1] 32322.55

Ru = exp(mean(log(ex_3_5)) + 1.838 * sd(log(ex_3_5)))
Ru

## [1] 246013.8

ggplot() +
  geom_boxplot(aes(x = "ex_3_5",
                   y = ex_3_5)) +
  geom_hline(yintercept = c(Rl, Ru),
             size = 2, color = "blue")

SMWR_Ch3

박선희

2018년 4월 18일

3.1

3-2

3-3

3-4

3-5