Introduction to using R for statistics

Last updated: 16:09:16 IST, 20 July, 2023

This page is for introduction to beginners on using R for statistics.

Normal Distribution

# Generate 10000 samples from a normal distribution with mean 0 and sd = 1 using rnorm() function. Find it's summary, variance and draw histogram.
# rnorm(n, mean = 0; sd = 1); Default mean = 0; sd=1; 
data <- rnorm(10000); summary(data); hist(data);var(data)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -3.761723 -0.683332 -0.011130 -0.009164  0.658044  3.546918

## [1] 1.014744

# Generate 10000 samples from a normal distribution with mean 2 and sd = 1 using rnorm() function. Find it's summary, variance and draw histogram.
data <- rnorm(10000,mean=2,sd=1);summary(data); hist(data); var(data)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -2.903   1.336   2.007   2.007   2.668   6.216

## [1] 0.9961883

#Use dnorm(x,mean=,sd=) function to get the probability for any value x in a normal distribution. 

# Generate a sequence of values from -6 to 6 in steps of 0.1
sequence1 <- seq(from=-6,to=6,by=0.1)

# Find the probabilities for these values
pdf_values <- dnorm(sequence1,mean=0,sd=0.5)

#Plot the probabilities and the values to see the normal distribution. 
plot(sequence1,pdf_values,type="l",xlab= 'value',ylab='Density',col='red',main='Normal Distribution (mean = 0,sd =0.5)')

Test of Means

One Sample, Two-sided Test

To find out if the mean of the population is a certain value based on the mean value of a sample drawn from the population.

data1 <- rnorm(1000,mean=0,sd=1);

# t.test(sample, mu=,alternative=,conf.level=)
ttest_res <- t.test(data1,mu=0,alternative="two.sided")
ttest_res$p.value;

## [1] 0.6989273

if(ttest_res$p.value < 0.05)
{
  print("Since p value is < 0.05, we have sufficient evidence to reject the null hypothesis (Mean value is equal to 0). Hence, true mean of the population is not 0")
} else {
  print("Since p value is not < 0.05, we do not have sufficient evidence to reject the null hypothesis (True Mean is equal to 0). Hence, Mean value of the population is 0")
}

## [1] "Since p value is not < 0.05, we do not have sufficient evidence to reject the null hypothesis (True Mean is equal to 0). Hence, Mean value of the population is 0"

data2 <- rnorm(1000,mean=2,sd=1);
ttest_res <- t.test(data2,mu=0,alternative="two.sided")
ttest_res$p.value;

## [1] 0

if(ttest_res$p.value < 0.05)
{
  print("Since p value is < 0.05, we have sufficient evidence to reject the null hypothesis (True Mean is equal to 0). Hence, true mean of the population is not 0")
} else {
  print("Since p value is not < 0.05, we do not have sufficient evidence to reject the null hypothesis (True Mean is equal to 0). Hence, Mean value of the population is 0")
}

## [1] "Since p value is < 0.05, we have sufficient evidence to reject the null hypothesis (True Mean is equal to 0). Hence, true mean of the population is not 0"

One Sample, One-sided Test “greater”

# t.test(sample, mu=,alternative=,conf.level=)
data1 <- rnorm(1000,mean=0,sd=1);
ttest_res <- t.test(data1,mu=0,alternative="greater")
ttest_res

## 
##  One Sample t-test
## 
## data:  data1
## t = 0.9091, df = 999, p-value = 0.1818
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
##  -0.02307898         Inf
## sample estimates:
##  mean of x 
## 0.02845767

ttest_res$p.value;

## [1] 0.1817574

if(ttest_res$p.value < 0.05)
{
  print("Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean  is not greater than 0). Hence, true mean of the population is greater than 0")
} else {
  print("Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (True mean is not greater than 0). Hence, true mean of the population is not greater than 0")
}

## [1] "Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (True mean is not greater than 0). Hence, true mean of the population is not greater than 0"

data2 <- rnorm(1000,mean=2,sd=1);
ttest_res <- t.test(data2,mu=0,alternative="greater")
ttest_res

## 
##  One Sample t-test
## 
## data:  data2
## t = 62.816, df = 999, p-value < 2.2e-16
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
##  1.907623      Inf
## sample estimates:
## mean of x 
##  1.958966

ttest_res$p.value;

## [1] 0

if(ttest_res$p.value < 0.05)
{
  print("Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean  is not greater than 0). Hence, true mean of the population is greater than 0")
} else {
  print("Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (True mean is not greater than 0). Hence, true mean of the population is not greater than 0")
}

## [1] "Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean  is not greater than 0). Hence, true mean of the population is greater than 0"

One Sample, One-sided Test (‘less’)

# t.test(sample, mu=,alternative=,conf.level=)
data1 <- rnorm(1000,mean=0,sd=1);
ttest_res <- t.test(data1,mu=0,alternative="less")
mean(data1); ttest_res$p.value;

## [1] 0.04316548

## [1] 0.9167532

if(ttest_res$p.value < 0.05)
{
  print("Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean is not less than 0). Hence, true mean of the population is less than 0")
} else {
  print("Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (true mean is not less than 0). Hence, true mean of the population is not less than 0")
}

## [1] "Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (true mean is not less than 0). Hence, true mean of the population is not less than 0"

data2 <- rnorm(1000,mean=-5,sd=1);
ttest_res <- t.test(data2,mu=0,alternative="less")
mean(data2); ttest_res$p.value;

## [1] -4.994351

## [1] 0

if(ttest_res$p.value < 0.05)
{
  print("Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean is not less than 0). Hence, true mean of the population is less than 0")
} else {
  print("Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (true mean is not less than 0). Hence, true mean of the population is not less than 0")
}

## [1] "Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean is not less than 0). Hence, true mean of the population is less than 0"