R

Link: http://www.r-project.org

To download R visit https://cloud.r-project.org

9 / 44
## [1] 0.2045455
0.6 * 0.4 + 0.3 * 0.6
## [1] 0.42
log(0.6 * 0.4 + 0.3 * 0.6)
## [1] -0.8675006

Introduction: https://cran.r-project.org/doc/manuals/r-release/R-intro.pdf

Data

Data in R

data()
?airquality
## starting httpd help server ... done
head(airquality)
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 5    NA      NA 14.3   56     5   5
## 6    28      NA 14.9   66     5   6
tail(airquality)
##     Ozone Solar.R Wind Temp Month Day
## 148    14      20 16.6   63     9  25
## 149    30     193  6.9   70     9  26
## 150    NA     145 13.2   77     9  27
## 151    14     191 14.3   75     9  28
## 152    18     131  8.0   76     9  29
## 153    20     223 11.5   68     9  30
head(airquality, n = 10)
##    Ozone Solar.R Wind Temp Month Day
## 1     41     190  7.4   67     5   1
## 2     36     118  8.0   72     5   2
## 3     12     149 12.6   74     5   3
## 4     18     313 11.5   62     5   4
## 5     NA      NA 14.3   56     5   5
## 6     28      NA 14.9   66     5   6
## 7     23     299  8.6   65     5   7
## 8     19      99 13.8   59     5   8
## 9      8      19 20.1   61     5   9
## 10    NA     194  8.6   69     5  10
airquality[148,4]
## [1] 63
airquality$Temp[148]
## [1] 63
airquality[148,]
##     Ozone Solar.R Wind Temp Month Day
## 148    14      20 16.6   63     9  25
airquality[1:10,c(1,4)]
##    Ozone Temp
## 1     41   67
## 2     36   72
## 3     12   74
## 4     18   62
## 5     NA   56
## 6     28   66
## 7     23   65
## 8     19   59
## 9      8   61
## 10    NA   69
summary(airquality$Temp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   56.00   72.00   79.00   77.88   85.00   97.00
hist(airquality$Temp)

plot(airquality$Temp)

plot(airquality$Ozone, airquality$Temp)

plot(airquality)

Box plot

P&S

Probability – the study of models for (random) experiments when the model is fully known

Statistics – to infer about the unknown aspects of the model based on observed outcomes of the experiment (when the model is not fully known)

Let us start with probability distributions!

xn = c(1,2,2,3,1,2,3,3,1,2,3,3,1)

factorxn = factor(xn)
factorxn
##  [1] 1 2 2 3 1 2 3 3 1 2 3 3 1
## Levels: 1 2 3
mean(xn)
## [1] 2.076923
mean(factorxn)
## Warning in mean.default(factorxn): argument is not numeric or logical:
## returning NA
## [1] NA
xc = c("June", "July", "August", "September", "August", "July","July","August" )

factorxc = factor(xc)

months = factor(xc,levels=c("Garbage", "January","February","March","April","May","June","July","August","September","October","November","December"), ordered=TRUE)

table(months)
## months
##   Garbage   January  February     March     April       May      June      July 
##         0         0         0         0         0         0         1         3 
##    August September   October  November  December 
##         3         1         0         0         0
table(factor(months))
## 
##      June      July    August September 
##         1         3         3         1