Last updated: 16:09:16 IST, 20 July, 2023
This page is for introduction to beginners on using R for statistics.
# Generate 10000 samples from a normal distribution with mean 0 and sd = 1 using rnorm() function. Find it's summary, variance and draw histogram.
# rnorm(n, mean = 0; sd = 1); Default mean = 0; sd=1;
data <- rnorm(10000); summary(data); hist(data);var(data)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.761723 -0.683332 -0.011130 -0.009164 0.658044 3.546918
## [1] 1.014744
# Generate 10000 samples from a normal distribution with mean 2 and sd = 1 using rnorm() function. Find it's summary, variance and draw histogram.
data <- rnorm(10000,mean=2,sd=1);summary(data); hist(data); var(data)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.903 1.336 2.007 2.007 2.668 6.216
## [1] 0.9961883
#Use dnorm(x,mean=,sd=) function to get the probability for any value x in a normal distribution.
# Generate a sequence of values from -6 to 6 in steps of 0.1
sequence1 <- seq(from=-6,to=6,by=0.1)
# Find the probabilities for these values
pdf_values <- dnorm(sequence1,mean=0,sd=0.5)
#Plot the probabilities and the values to see the normal distribution.
plot(sequence1,pdf_values,type="l",xlab= 'value',ylab='Density',col='red',main='Normal Distribution (mean = 0,sd =0.5)')
To find out if the mean of the population is a certain value based on the mean value of a sample drawn from the population.
data1 <- rnorm(1000,mean=0,sd=1);
# t.test(sample, mu=,alternative=,conf.level=)
ttest_res <- t.test(data1,mu=0,alternative="two.sided")
ttest_res$p.value;
## [1] 0.6989273
if(ttest_res$p.value < 0.05)
{
print("Since p value is < 0.05, we have sufficient evidence to reject the null hypothesis (Mean value is equal to 0). Hence, true mean of the population is not 0")
} else {
print("Since p value is not < 0.05, we do not have sufficient evidence to reject the null hypothesis (True Mean is equal to 0). Hence, Mean value of the population is 0")
}
## [1] "Since p value is not < 0.05, we do not have sufficient evidence to reject the null hypothesis (True Mean is equal to 0). Hence, Mean value of the population is 0"
data2 <- rnorm(1000,mean=2,sd=1);
ttest_res <- t.test(data2,mu=0,alternative="two.sided")
ttest_res$p.value;
## [1] 0
if(ttest_res$p.value < 0.05)
{
print("Since p value is < 0.05, we have sufficient evidence to reject the null hypothesis (True Mean is equal to 0). Hence, true mean of the population is not 0")
} else {
print("Since p value is not < 0.05, we do not have sufficient evidence to reject the null hypothesis (True Mean is equal to 0). Hence, Mean value of the population is 0")
}
## [1] "Since p value is < 0.05, we have sufficient evidence to reject the null hypothesis (True Mean is equal to 0). Hence, true mean of the population is not 0"
# t.test(sample, mu=,alternative=,conf.level=)
data1 <- rnorm(1000,mean=0,sd=1);
ttest_res <- t.test(data1,mu=0,alternative="greater")
ttest_res
##
## One Sample t-test
##
## data: data1
## t = 0.9091, df = 999, p-value = 0.1818
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
## -0.02307898 Inf
## sample estimates:
## mean of x
## 0.02845767
ttest_res$p.value;
## [1] 0.1817574
if(ttest_res$p.value < 0.05)
{
print("Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean is not greater than 0). Hence, true mean of the population is greater than 0")
} else {
print("Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (True mean is not greater than 0). Hence, true mean of the population is not greater than 0")
}
## [1] "Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (True mean is not greater than 0). Hence, true mean of the population is not greater than 0"
data2 <- rnorm(1000,mean=2,sd=1);
ttest_res <- t.test(data2,mu=0,alternative="greater")
ttest_res
##
## One Sample t-test
##
## data: data2
## t = 62.816, df = 999, p-value < 2.2e-16
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
## 1.907623 Inf
## sample estimates:
## mean of x
## 1.958966
ttest_res$p.value;
## [1] 0
if(ttest_res$p.value < 0.05)
{
print("Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean is not greater than 0). Hence, true mean of the population is greater than 0")
} else {
print("Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (True mean is not greater than 0). Hence, true mean of the population is not greater than 0")
}
## [1] "Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean is not greater than 0). Hence, true mean of the population is greater than 0"
# t.test(sample, mu=,alternative=,conf.level=)
data1 <- rnorm(1000,mean=0,sd=1);
ttest_res <- t.test(data1,mu=0,alternative="less")
mean(data1); ttest_res$p.value;
## [1] 0.04316548
## [1] 0.9167532
if(ttest_res$p.value < 0.05)
{
print("Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean is not less than 0). Hence, true mean of the population is less than 0")
} else {
print("Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (true mean is not less than 0). Hence, true mean of the population is not less than 0")
}
## [1] "Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (true mean is not less than 0). Hence, true mean of the population is not less than 0"
data2 <- rnorm(1000,mean=-5,sd=1);
ttest_res <- t.test(data2,mu=0,alternative="less")
mean(data2); ttest_res$p.value;
## [1] -4.994351
## [1] 0
if(ttest_res$p.value < 0.05)
{
print("Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean is not less than 0). Hence, true mean of the population is less than 0")
} else {
print("Since p value is not < 0.05, we do not have sufficient evidence to reject the Null Hypothesis (true mean is not less than 0). Hence, true mean of the population is not less than 0")
}
## [1] "Since p value is < 0.05, we have sufficient evidence to reject the Null Hypothesis (True Mean is not less than 0). Hence, true mean of the population is less than 0"