R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Question:1.i Aim: draw a histogram and boxplot with title and labels of given data

weights <- c(42,74,40,60,82,115,41,61,75,83,63,
 53,110,76,84,50,67,65,78,77,56,95,
 68,69,104,80,79,79,54,73,59,81,100,
 66,49,77,90,84,76,42,64,69,70,80,
 72,50,79,52,103,96,51,86,78,94,71)
hist(weights, breaks=10, col="blue", main="Histogram of Student Weights",
 xlab="Weight", ylab="Frequency", border="black")

boxplot(weights, col="green", main="Boxplot of Student Weights", xlab="Weight
")

Question:1.ii Aim:to prepare a cumulative frequency table as data frame for given data

weights_table <- data.frame(table(weights))
weights_table$Cumulative_Frequency <- cumsum(weights_table$Freq)
colnames(weights_table) <- c("Weight", "Frequency", "Cumulative Frequency")
weights_table
##    Weight Frequency Cumulative Frequency
## 1      40         1                    1
## 2      41         1                    2
## 3      42         2                    4
## 4      49         1                    5
## 5      50         2                    7
## 6      51         1                    8
## 7      52         1                    9
## 8      53         1                   10
## 9      54         1                   11
## 10     56         1                   12
## 11     59         1                   13
## 12     60         1                   14
## 13     61         1                   15
## 14     63         1                   16
## 15     64         1                   17
## 16     65         1                   18
## 17     66         1                   19
## 18     67         1                   20
## 19     68         1                   21
## 20     69         2                   23
## 21     70         1                   24
## 22     71         1                   25
## 23     72         1                   26
## 24     73         1                   27
## 25     74         1                   28
## 26     75         1                   29
## 27     76         2                   31
## 28     77         2                   33
## 29     78         2                   35
## 30     79         3                   38
## 31     80         2                   40
## 32     81         1                   41
## 33     82         1                   42
## 34     83         1                   43
## 35     84         2                   45
## 36     86         1                   46
## 37     90         1                   47
## 38     94         1                   48
## 39     95         1                   49
## 40     96         1                   50
## 41    100         1                   51
## 42    103         1                   52
## 43    104         1                   53
## 44    110         1                   54
## 45    115         1                   55

Question:1.iii Aim:To find mean and mode of the given data

mean_weight=mean(weights)
mean_weight
## [1] 72.58182
xr=table(weights)
xr
## weights
##  40  41  42  49  50  51  52  53  54  56  59  60  61  63  64  65  66  67  68  69 
##   1   1   2   1   2   1   1   1   1   1   1   1   1   1   1   1   1   1   1   2 
##  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  86  90  94  95  96 
##   1   1   1   1   1   1   2   2   2   3   2   1   1   1   2   1   1   1   1   1 
## 100 103 104 110 115 
##   1   1   1   1   1
mode=which(xr==max(xr))
mode
## 79 
## 30

}Question:2.i Aim: To find the mean value of the given data

mid=seq(5,60,5)
mid
##  [1]  5 10 15 20 25 30 35 40 45 50 55 60
f=c(12,28,65,121,175,198,176,120,66,27,9,3)
fr_dist=data.frame(mid,f)
fr_dist
##    mid   f
## 1    5  12
## 2   10  28
## 3   15  65
## 4   20 121
## 5   25 175
## 6   30 198
## 7   35 176
## 8   40 120
## 9   45  66
## 10  50  27
## 11  55   9
## 12  60   3
mean=(sum(mid*f))/sum(f)
mean
## [1] 30.005

Question:2.ii Aim:to find the variance of given data

variance <- sum(fr_dist$f * (fr_dist$mid - mean)^2) / sum(fr_dist$f)
sigma_value <- sqrt(variance)
print(sigma_value)
## [1] 10.00874

Question:2.iii Aim:to find the coefficient of variation

cv <- (sigma_value / mean) * 100
print(cv)
## [1] 33.35692

Result: Mean=30.005, Variance= 10.00874, Coefficeint of Variation= 33.35

Question:3.i Aim: To find the quartile deviation for Sepal.Length parameter in iris dataset

data=iris
head(data)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
Q1 <- quantile(iris$Sepal.Length, 0.25)
Q3 <- quantile(iris$Sepal.Length, 0.75)
qd <- (Q3 - Q1) / 2
qd
##  75% 
## 0.65

Question:3.ii Aim:To find the coefficient of quartile deviation for Petal.Width

Q1_pw <- quantile(iris$Petal.Width, 0.25)
Q3_pw <- quantile(iris$Petal.Width, 0.75)
cqd<- (Q3_pw - Q1_pw) / (Q3_pw + Q1_pw)
cqd
##       75% 
## 0.7142857

Question:3.iii Aim:To draw a pie chart with title and legend Species

species_count <- table(iris$Species)
pie(species_count, main="Species Distribution in Iris Dataset", col=rainbow(length(species_count)))
legend("topright", legend=names(species_count), fill=rainbow(length(species_count)))

Question:4.i Aim:To find the variance for the Freq

data=Titanic
head(data)
## , , Age = Child, Survived = No
## 
##       Sex
## Class  Male Female
##   1st     0      0
##   2nd     0      0
##   3rd    35     17
##   Crew    0      0
## 
## , , Age = Adult, Survived = No
## 
##       Sex
## Class  Male Female
##   1st   118      4
##   2nd   154     13
##   3rd   387     89
##   Crew  670      3
## 
## , , Age = Child, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st     5      1
##   2nd    11     13
##   3rd    13     14
##   Crew    0      0
## 
## , , Age = Adult, Survived = Yes
## 
##       Sex
## Class  Male Female
##   1st    57    140
##   2nd    14     80
##   3rd    75     76
##   Crew  192     20
titanic_df <- data.frame(Titanic)
titanic_df
##    Class    Sex   Age Survived Freq
## 1    1st   Male Child       No    0
## 2    2nd   Male Child       No    0
## 3    3rd   Male Child       No   35
## 4   Crew   Male Child       No    0
## 5    1st Female Child       No    0
## 6    2nd Female Child       No    0
## 7    3rd Female Child       No   17
## 8   Crew Female Child       No    0
## 9    1st   Male Adult       No  118
## 10   2nd   Male Adult       No  154
## 11   3rd   Male Adult       No  387
## 12  Crew   Male Adult       No  670
## 13   1st Female Adult       No    4
## 14   2nd Female Adult       No   13
## 15   3rd Female Adult       No   89
## 16  Crew Female Adult       No    3
## 17   1st   Male Child      Yes    5
## 18   2nd   Male Child      Yes   11
## 19   3rd   Male Child      Yes   13
## 20  Crew   Male Child      Yes    0
## 21   1st Female Child      Yes    1
## 22   2nd Female Child      Yes   13
## 23   3rd Female Child      Yes   14
## 24  Crew Female Child      Yes    0
## 25   1st   Male Adult      Yes   57
## 26   2nd   Male Adult      Yes   14
## 27   3rd   Male Adult      Yes   75
## 28  Crew   Male Adult      Yes  192
## 29   1st Female Adult      Yes  140
## 30   2nd Female Adult      Yes   80
## 31   3rd Female Adult      Yes   76
## 32  Crew Female Adult      Yes   20
variance_freq <- var(titanic_df$Freq)
variance_freq
## [1] 18494.89

Question:4.ii Aim:To find the 5th deciles for the Freq

decile_5 <- quantile(titanic_df$Freq, probs = 0.5)
decile_5
##  50% 
## 13.5

Question:4.ii Aim:: To find the 29th percentile for the Freq

percentile_29 <- quantile(titanic_df$Freq, probs = 0.29)
percentile_29
##  29% 
## 2.98

Result: 5th decile=13.5, 29th percentile=2.98