ТОчечные оценки

x <- 1:100
y <- rnorm(x, mean = 50, sd = 10)
plot(x, y)

hist(y)

оценка - средняя

x <- 1:100
y <- rnorm(x, mean = 50, sd = 10)

mean(y)
## [1] 49.70675
sd(y)
## [1] 9.827458
our_sd <- sqrt(sum((y - mean(y))^2)/(length(y)-1))
our_sd
## [1] 9.827458

Интервальные оценки

для нормального распределения

sample <- replicate(500, mean(rnorm(100, mean = 30, sd = 3)))

p1 <- hist(rnorm(100, mean = 30, sd = 3))

p2 <- hist(sample)

plot(p1, col = "red")
plot(p2, col="blue", add = T)

sample_sample <- rnorm(100, mean = 30, sd = 3)
sd(rnorm(100, mean = 30, sd = 3))
## [1] 3.024488
sd(sample)
## [1] 0.3009721
se <- sd(sample_sample)/sqrt(length(sample_sample))
se
## [1] 0.3299886

для (не)нормального распределения

log_sample <- rlnorm(10000)
hist(log_sample, breaks = 200)

log_mean_sample <- replicate(1000, mean(rlnorm(10000)))


p1 <- hist(log_sample, breaks = 200)

p2 <- hist(log_mean_sample)

plot(p1, col = "red", xlim = c(0, 4), ylim = c(0, 100))
plot(p2, col="blue", add = T)

ggplot2

бобры

library(tidyverse)
## Warning: пакет 'tidyverse' был собран под R версии 4.2.3
## Warning: пакет 'ggplot2' был собран под R версии 4.2.3
## Warning: пакет 'tibble' был собран под R версии 4.2.3
## Warning: пакет 'readr' был собран под R версии 4.2.3
## Warning: пакет 'purrr' был собран под R версии 4.2.3
## Warning: пакет 'dplyr' был собран под R версии 4.2.3
## Warning: пакет 'lubridate' был собран под R версии 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)

# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html

beaver2
##     day time  temp activ
## 1   307  930 36.58     0
## 2   307  940 36.73     0
## 3   307  950 36.93     0
## 4   307 1000 37.15     0
## 5   307 1010 37.23     0
## 6   307 1020 37.24     0
## 7   307 1030 37.24     0
## 8   307 1040 36.90     0
## 9   307 1050 36.95     0
## 10  307 1100 36.89     0
## 11  307 1110 36.95     0
## 12  307 1120 37.00     0
## 13  307 1130 36.90     0
## 14  307 1140 36.99     0
## 15  307 1150 36.99     0
## 16  307 1200 37.01     0
## 17  307 1210 37.04     0
## 18  307 1220 37.04     0
## 19  307 1230 37.14     0
## 20  307 1240 37.07     0
## 21  307 1250 36.98     0
## 22  307 1300 37.01     0
## 23  307 1310 36.97     0
## 24  307 1320 36.97     0
## 25  307 1330 37.12     0
## 26  307 1340 37.13     0
## 27  307 1350 37.14     0
## 28  307 1400 37.15     0
## 29  307 1410 37.17     0
## 30  307 1420 37.12     0
## 31  307 1430 37.12     0
## 32  307 1440 37.17     0
## 33  307 1450 37.28     0
## 34  307 1500 37.28     0
## 35  307 1510 37.44     0
## 36  307 1520 37.51     0
## 37  307 1530 37.64     0
## 38  307 1540 37.51     0
## 39  307 1550 37.98     1
## 40  307 1600 38.02     1
## 41  307 1610 38.00     1
## 42  307 1620 38.24     1
## 43  307 1630 38.10     1
## 44  307 1640 38.24     1
## 45  307 1650 38.11     1
## 46  307 1700 38.02     1
## 47  307 1710 38.11     1
## 48  307 1720 38.01     1
## 49  307 1730 37.91     1
## 50  307 1740 37.96     1
## 51  307 1750 38.03     1
## 52  307 1800 38.17     1
## 53  307 1810 38.19     1
## 54  307 1820 38.18     1
## 55  307 1830 38.15     1
## 56  307 1840 38.04     1
## 57  307 1850 37.96     1
## 58  307 1900 37.84     1
## 59  307 1910 37.83     1
## 60  307 1920 37.84     1
## 61  307 1930 37.74     1
## 62  307 1940 37.76     1
## 63  307 1950 37.76     1
## 64  307 2000 37.64     1
## 65  307 2010 37.63     1
## 66  307 2020 38.06     1
## 67  307 2030 38.19     1
## 68  307 2040 38.35     1
## 69  307 2050 38.25     1
## 70  307 2100 37.86     1
## 71  307 2110 37.95     1
## 72  307 2120 37.95     1
## 73  307 2130 37.76     1
## 74  307 2140 37.60     1
## 75  307 2150 37.89     1
## 76  307 2200 37.86     1
## 77  307 2210 37.71     1
## 78  307 2220 37.78     1
## 79  307 2230 37.82     1
## 80  307 2240 37.76     1
## 81  307 2250 37.81     1
## 82  307 2300 37.84     1
## 83  307 2310 38.01     1
## 84  307 2320 38.10     1
## 85  307 2330 38.15     1
## 86  307 2340 37.92     1
## 87  307 2350 37.64     1
## 88  308    0 37.70     1
## 89  308   10 37.46     1
## 90  308   20 37.41     1
## 91  308   30 37.46     1
## 92  308   40 37.56     1
## 93  308   50 37.55     1
## 94  308  100 37.75     1
## 95  308  110 37.76     1
## 96  308  120 37.73     1
## 97  308  130 37.77     1
## 98  308  140 38.01     1
## 99  308  150 38.04     1
## 100 308  200 38.07     1
summary(beaver2)
##       day             time           temp           activ     
##  Min.   :307.0   Min.   :   0   Min.   :36.58   Min.   :0.00  
##  1st Qu.:307.0   1st Qu.:1128   1st Qu.:37.15   1st Qu.:0.00  
##  Median :307.0   Median :1535   Median :37.73   Median :1.00  
##  Mean   :307.1   Mean   :1446   Mean   :37.60   Mean   :0.62  
##  3rd Qu.:307.0   3rd Qu.:1942   3rd Qu.:37.98   3rd Qu.:1.00  
##  Max.   :308.0   Max.   :2350   Max.   :38.35   Max.   :1.00
gr_beaver <- ggplot(data = beaver2, aes(x = time, y = temp)) +
  geom_line() +
  geom_point()

gr_beaver

ирисы

library(tidyverse)
library(ggplot2)
library(ggpubr)
## Warning: пакет 'ggpubr' был собран под R версии 4.2.3
# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html
#https://r-graph-gallery.com
#https://r-statistics.co/Top50-Ggplot2-Visualizations-MasterList-R-Code.html
#install.packages("ggpubr")

summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
iris_s <- iris %>% filter(Species != "setosa")

gr_iris<- ggplot(data = iris_s, 
                    aes(x = Species, y = Sepal.Length, color = Species)) +
  geom_boxplot() +
  geom_jitter(width = 0.25) +
  stat_compare_means() +
  xlab("Вид")
  
gr_iris

ggsave("gr_iris.png", width = 6, height = 6)