dat1=read.csv("C:/Users/Lenovo/Downloads/tablet quality control.csv",header=TRUE)
head(dat1)
attach(dat1) #Attach the dataset so the variable names can be used directly
Company=as.factor(Company)
Conformity = as.factor(Conformity)
str(dat1) # Check the structure of the dataset
## 'data.frame': 100 obs. of 4 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Company : chr "X" "X" "X" "X" ...
## $ Conformity : chr "Yes" "Yes" "Yes" "Yes" ...
## $ Concentration: num 100.7 98.5 103.8 99.6 97.1 ...
summary(dat1)
## ID Company Conformity Concentration
## Min. : 1.00 Length:100 Length:100 Min. : 90.40
## 1st Qu.: 25.75 Class :character Class :character 1st Qu.: 94.47
## Median : 50.50 Mode :character Mode :character Median : 97.50
## Mean : 50.50 Mean : 97.62
## 3rd Qu.: 75.25 3rd Qu.:100.15
## Max. :100.00 Max. :106.90
#3.2 Probability of event
tab=table(Company, Conformity) #count the number of conforming and non-conforming tablets with function table()
tab
## Conformity
## Company No Yes
## X 26 24
## Y 12 38
addmargins(tab) # Add row totals and column totals with the function of addmargins
## Conformity
## Company No Yes Sum
## X 26 24 50
## Y 12 38 50
## Sum 38 62 100
#Probability of E1
PrE1=50/100
PrE1
## [1] 0.5
#Probability of E2
PrE2=62/100
PrE2
## [1] 0.62
#Probability of E1 and E2
PrE12=PrE1+PrE2-24/100
PrE12
## [1] 0.88
#Probability of union of complement E1, E2
PrE1C=1-PrE1
PrE2C=1-PrE2
PrE1C2C=PrE1C+PrE2C-12/100
PrE1C2C
## [1] 0.76
#3.3 Binomial Distribution
#Probability of observing exact 8 tablets conforming (Yes)
dbinom(8,size=10,prob=PrE2)
## [1] 0.1418774
#probability of observing 8 or fewer conforming tablets
pbinom(8,size=10,prob=PrE2)
## [1] 0.9401661
#Plot binominal distributions
x=0:10
plot(x,dbinom(x,size=10,prob=PrE2),type="h",lwd=3,xlab="Number of conforming tablets",ylab="Binominal Distribution ofconforming tablets")
#3.4 Normal Distribution
par(mfrow=c(1,3))
#Histogram
hist(Concentration,col="blue",main="Histogram of Tablet Concentration",xlab="Concentration") # density curve
plot(density(Concentration),main="Density Curve of Tablet Concentration",xlab="Concentration",ylab="Density",lwd=2)
#Q-Q plot
qqnorm(Concentration,main="Q-Q Plot of Tablet Concentration")
qqline(Concentration,col="red",lwd=2)
#The probability that concentration is less than 98
pnorm(98, mean=mean(Concentration), sd=sd(Concentration))
## [1] 0.5383799
#The probability that concentration is greater than 102
pnorm(102, mean=mean(Concentration), sd=sd(Concentration), lower.tail=FALSE)
## [1] 0.1310622
#The probability that concentration is between 98 and 102
pnorm(102, mean=mean(Concentration), sd=sd(Concentration))
## [1] 0.8689378
pnorm(98, mean=mean(Concentration), sd=sd(Concentration))
## [1] 0.5383799
#Find the concentration value corresponding to the upper 5% with function
qnorm(0.95, mean=mean(Concentration), sd=sd(Concentration))
## [1] 104.0427
#3.5 Confidence Interval for the Mean
#Confide interval for the mean()
mean_conc=tapply(Concentration, Company, mean)
mean_conc
## X Y
## 97.186 98.062
sd_conc=tapply(Concentration, Company, sd)
sd_conc
## X Y
## 4.463183 3.233491
n_conc=tapply(Concentration, Company, length)
lower_CI=mean_conc-qt(0.975,n_conc)*(sd_conc/sqrt(n_conc))
lower_CI
## X Y
## 95.91822 97.14352
upper_CI=mean_conc+qt(0.975,(n_conc-1))*(sd_conc/sqrt(n_conc))
upper_CI
## X Y
## 98.45442 98.98095
#Combine into one table
ci_table=data.frame(Company = names(mean_conc),n =as.vector(n_conc),Mean
= as.vector(mean_conc),SD = as.vector(sd_conc),SE= as.vector(sd_conc),Lower_95Cl=as.vector(lower_CI),Upper_95Cl=as.vector(upper_CI))
ci_table