This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
Question:1.i Aim: draw a histogram and boxplot with title and labels of given data
weights <- c(42,74,40,60,82,115,41,61,75,83,63,
53,110,76,84,50,67,65,78,77,56,95,
68,69,104,80,79,79,54,73,59,81,100,
66,49,77,90,84,76,42,64,69,70,80,
72,50,79,52,103,96,51,86,78,94,71)
hist(weights, breaks=10, col="blue", main="Histogram of Student Weights",
xlab="Weight", ylab="Frequency", border="black")
boxplot(weights, col="green", main="Boxplot of Student Weights", xlab="Weight
")
Question:1.ii Aim:to prepare a cumulative
frequency table as data frame for given data
weights_table <- data.frame(table(weights))
weights_table$Cumulative_Frequency <- cumsum(weights_table$Freq)
colnames(weights_table) <- c("Weight", "Frequency", "Cumulative Frequency")
weights_table
## Weight Frequency Cumulative Frequency
## 1 40 1 1
## 2 41 1 2
## 3 42 2 4
## 4 49 1 5
## 5 50 2 7
## 6 51 1 8
## 7 52 1 9
## 8 53 1 10
## 9 54 1 11
## 10 56 1 12
## 11 59 1 13
## 12 60 1 14
## 13 61 1 15
## 14 63 1 16
## 15 64 1 17
## 16 65 1 18
## 17 66 1 19
## 18 67 1 20
## 19 68 1 21
## 20 69 2 23
## 21 70 1 24
## 22 71 1 25
## 23 72 1 26
## 24 73 1 27
## 25 74 1 28
## 26 75 1 29
## 27 76 2 31
## 28 77 2 33
## 29 78 2 35
## 30 79 3 38
## 31 80 2 40
## 32 81 1 41
## 33 82 1 42
## 34 83 1 43
## 35 84 2 45
## 36 86 1 46
## 37 90 1 47
## 38 94 1 48
## 39 95 1 49
## 40 96 1 50
## 41 100 1 51
## 42 103 1 52
## 43 104 1 53
## 44 110 1 54
## 45 115 1 55
Question:1.iii Aim:To find mean and mode of the given data
mean_weight=mean(weights)
mean_weight
## [1] 72.58182
xr=table(weights)
xr
## weights
## 40 41 42 49 50 51 52 53 54 56 59 60 61 63 64 65 66 67 68 69
## 1 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2
## 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 86 90 94 95 96
## 1 1 1 1 1 1 2 2 2 3 2 1 1 1 2 1 1 1 1 1
## 100 103 104 110 115
## 1 1 1 1 1
mode=which(xr==max(xr))
mode
## 79
## 30
}Question:2.i Aim: To find the mean value of the given data
mid=seq(5,60,5)
mid
## [1] 5 10 15 20 25 30 35 40 45 50 55 60
f=c(12,28,65,121,175,198,176,120,66,27,9,3)
fr_dist=data.frame(mid,f)
fr_dist
## mid f
## 1 5 12
## 2 10 28
## 3 15 65
## 4 20 121
## 5 25 175
## 6 30 198
## 7 35 176
## 8 40 120
## 9 45 66
## 10 50 27
## 11 55 9
## 12 60 3
mean=(sum(mid*f))/sum(f)
mean
## [1] 30.005
Question:2.ii Aim:to find the variance of given data
variance <- sum(fr_dist$f * (fr_dist$mid - mean)^2) / sum(fr_dist$f)
sigma_value <- sqrt(variance)
print(sigma_value)
## [1] 10.00874
Question:2.iii Aim:to find the coefficient of variation
cv <- (sigma_value / mean) * 100
print(cv)
## [1] 33.35692
Result: Mean=30.005, Variance= 10.00874, Coefficeint of Variation= 33.35
Question:3.i Aim: To find the quartile deviation for Sepal.Length parameter in iris dataset
data=iris
head(data)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
Q1 <- quantile(iris$Sepal.Length, 0.25)
Q3 <- quantile(iris$Sepal.Length, 0.75)
qd <- (Q3 - Q1) / 2
qd
## 75%
## 0.65
Question:3.ii Aim:To find the coefficient of quartile deviation for Petal.Width
Q1_pw <- quantile(iris$Petal.Width, 0.25)
Q3_pw <- quantile(iris$Petal.Width, 0.75)
cqd<- (Q3_pw - Q1_pw) / (Q3_pw + Q1_pw)
cqd
## 75%
## 0.7142857
Question:3.iii Aim:To draw a pie chart with title and legend Species
species_count <- table(iris$Species)
pie(species_count, main="Species Distribution in Iris Dataset", col=rainbow(length(species_count)))
legend("topright", legend=names(species_count), fill=rainbow(length(species_count)))
Question:4.i Aim:To find the variance for the
Freq
data=Titanic
head(data)
## , , Age = Child, Survived = No
##
## Sex
## Class Male Female
## 1st 0 0
## 2nd 0 0
## 3rd 35 17
## Crew 0 0
##
## , , Age = Adult, Survived = No
##
## Sex
## Class Male Female
## 1st 118 4
## 2nd 154 13
## 3rd 387 89
## Crew 670 3
##
## , , Age = Child, Survived = Yes
##
## Sex
## Class Male Female
## 1st 5 1
## 2nd 11 13
## 3rd 13 14
## Crew 0 0
##
## , , Age = Adult, Survived = Yes
##
## Sex
## Class Male Female
## 1st 57 140
## 2nd 14 80
## 3rd 75 76
## Crew 192 20
titanic_df <- data.frame(Titanic)
titanic_df
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
## 7 3rd Female Child No 17
## 8 Crew Female Child No 0
## 9 1st Male Adult No 118
## 10 2nd Male Adult No 154
## 11 3rd Male Adult No 387
## 12 Crew Male Adult No 670
## 13 1st Female Adult No 4
## 14 2nd Female Adult No 13
## 15 3rd Female Adult No 89
## 16 Crew Female Adult No 3
## 17 1st Male Child Yes 5
## 18 2nd Male Child Yes 11
## 19 3rd Male Child Yes 13
## 20 Crew Male Child Yes 0
## 21 1st Female Child Yes 1
## 22 2nd Female Child Yes 13
## 23 3rd Female Child Yes 14
## 24 Crew Female Child Yes 0
## 25 1st Male Adult Yes 57
## 26 2nd Male Adult Yes 14
## 27 3rd Male Adult Yes 75
## 28 Crew Male Adult Yes 192
## 29 1st Female Adult Yes 140
## 30 2nd Female Adult Yes 80
## 31 3rd Female Adult Yes 76
## 32 Crew Female Adult Yes 20
variance_freq <- var(titanic_df$Freq)
variance_freq
## [1] 18494.89
Question:4.ii Aim:To find the 5th deciles for the Freq
decile_5 <- quantile(titanic_df$Freq, probs = 0.5)
decile_5
## 50%
## 13.5
Question:4.ii Aim:: To find the 29th percentile for the Freq
percentile_29 <- quantile(titanic_df$Freq, probs = 0.29)
percentile_29
## 29%
## 2.98
Result: 5th decile=13.5, 29th percentile=2.98