basic statistical functions on the data set from the notes: {x_i, i=1,…,10} = {3, 5, 8, 10, 15, 20, 21}

# input the data as a column vector 'x'
x <- c(3, 5, 8, 10, 15, 20, 21)
# create a dataframe 'df' from the column vector 'x'
df <- data.frame(x)
head(df,2)
##   x
## 1 3
## 2 5
 # 'summary' gives basic statistics
summary(df) # note 1st and 3rd quartiles are true 25th and 75th percentiles
##        x        
##  Min.   : 3.00  
##  1st Qu.: 6.50  
##  Median :10.00  
##  Mean   :11.71  
##  3rd Qu.:17.50  
##  Max.   :21.00
xbar <- mean(df$x) # mean of column 'x' in dataframe 'df'
s <- sd(df$x) # sample standard deviation

print results in markdown: mean = 11.7142857, standard dev = 7.1113591

options(repr.plot.width=5, repr.plot.height=3.5) # change default size all plots
# histogram of column 'x' in dataframe
hist(df$x, xlab = 'x', main='histogram', col='lightblue')

boxplot(df,xlab=names(df)) # boxplot of every column in dataframe

# empirical cumulative distribution function (ecdf)
Fn <- ecdf(df$x) # creates ecdf from column 'x' of dataframe 'df', saves as 'Fn'
plot(Fn,col="purple") # plot of ecdf (left continuous)