knitr::opts_chunk$set(echo = TRUE)

Load the ToothGrowth data and perform some basic exploratory data analyses.

install.packages("plotly",repos = "http://cran.us.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/4z/9kr7k7jd1q7gc792xjkrwf040000gn/T//RtmpEB5zTi/downloaded_packages

library(plotly)

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

summary(ToothGrowth)

##       len        supp         dose      
##  Min.   : 4.20   OJ:30   Min.   :0.500  
##  1st Qu.:13.07   VC:30   1st Qu.:0.500  
##  Median :19.25           Median :1.000  
##  Mean   :18.81           Mean   :1.167  
##  3rd Qu.:25.27           3rd Qu.:2.000  
##  Max.   :33.90           Max.   :2.000

p<-ggplot(data=ToothGrowth, aes(x=dose, y=len))+geom_smooth(aes(colour = supp, fill = supp)) + facet_wrap(~ supp)+ geom_point(aes(text = paste("Supp:", supp), alpha = 0.2))
ggplotly(p)

## `geom_smooth()` using method = 'loess'

Performing Confidence Interval for two continous attributes

x<-ToothGrowth$dose
(mean(x) + c(-1, 1) * qnorm(0.975) * sd(x)/sqrt(length(x)))/12

## [1] 0.08396191 0.11048253

y<-ToothGrowth$len
(mean(y) + c(-1, 1) * qnorm(0.975) * sd(x)/sqrt(length(x)))/12

## [1] 1.554517 1.581038

Summary

group_by_supp <- group_by(ToothGrowth, supp)
summary <- summarise(group_by_supp, count= n(), mean=mean(len), median=median(len), 
                     "standard deviation" = sd(len))
summary_data <- as.data.frame(summary)
summary_data

##   supp count     mean median standard deviation
## 1   OJ    30 20.66333   22.7           6.605561
## 2   VC    30 16.96333   16.5           8.266029

group_by_supp <- group_by(ToothGrowth, supp, dose)
summary <- summarise(group_by_supp, count= n(), mean=mean(len), median=median(len), 
                     "standard deviation" = sd(len))
summary_data <- as.data.frame(summary)
summary_data

##   supp dose count  mean median standard deviation
## 1   OJ  0.5    10 13.23  12.25           4.459709
## 2   OJ  1.0    10 22.70  23.45           3.910953
## 3   OJ  2.0    10 26.06  25.95           2.655058
## 4   VC  0.5    10  7.98   7.15           2.746634
## 5   VC  1.0    10 16.77  16.50           2.515309
## 6   VC  2.0    10 26.14  25.95           4.797731

Performing hypothesis tests

t.test(len~supp, data=ToothGrowth, paired=FALSE)

## 
##  Welch Two Sample t-test
## 
## data:  len by supp
## t = 1.9153, df = 55.309, p-value = 0.06063
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1710156  7.5710156
## sample estimates:
## mean in group OJ mean in group VC 
##         20.66333         16.96333

Boostraping

install.packages("boot",repos = "http://cran.us.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/4z/9kr7k7jd1q7gc792xjkrwf040000gn/T//RtmpEB5zTi/downloaded_packages

# Bootstrap 95% CI for R-Squared
library(boot)
# function to obtain R-Squared from the data 
rsq <- function(formula, data, indices) {
  d <- data[indices,] # allows boot to select sample 
  fit <- lm(formula, data=d)
  return(summary(fit)$r.square)
} 
# bootstrapping with 1000 replications 
results <- boot(data=ToothGrowth, statistic=rsq, 
    R=100, formula=len~supp)

# view results
results

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = ToothGrowth, statistic = rsq, R = 100, formula = len ~ 
##     supp)
## 
## 
## Bootstrap Statistics :
##       original     bias    std. error
## t1* 0.05948365 0.02312685  0.06776327

plot(results)

# get 95% confidence interval 
boot.ci(results, type="bca")

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 100 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = results, type = "bca")
## 
## Intervals : 
## Level       BCa          
## 95%   ( 0.0000,  0.2176 )  
## Calculations and Intervals on Original Scale
## Some BCa intervals may be unstable

In the Plot it shows, what type supplement and how many times has been taken, improves the teeth growth and it tells VC and OJ supplement have final list approximatly same

Statistics Project

Load the ToothGrowth data and perform some basic exploratory data analyses.

Performing Confidence Interval for two continous attributes

Summary

Performing hypothesis tests

Boostraping