Aim

To administer baseline statistical analysis on a dataset and report descriptive analysis summary.

Algorithm

R-code

Loading data

df<-read.csv("https://raw.githubusercontent.com/sijuswamy/StatLab/main/Dataset_1.csv",header = TRUE)
df$Gender=as.factor(df$Gender)

head(df)
##          Student_Name Gender X20IMCAT201 X20IMCAT203 X20IMCAT205 X20IMCAT207
## 1 ABEL MATHEW ABRAHAM      M         1.5         1.5          18           1
## 2         ABEN B JOHN      M         8.5         6.0          26           5
## 3           ABIN SAJI      M         0.0         0.0          21           0
## 4       ADWAITH SANIL      M         8.0        13.0          18           4
## 5         AKSHAY BABU      M         8.0         4.0          14          10
## 6         ALEN T BINU      M        29.5        24.0          43          19
##   X20IMCAT209
## 1          16
## 2          23
## 3           0
## 4          17
## 5           5
## 6          35
str(df)
## 'data.frame':    56 obs. of  7 variables:
##  $ Student_Name: chr  "ABEL MATHEW ABRAHAM" "ABEN B JOHN" "ABIN SAJI" "ADWAITH SANIL" ...
##  $ Gender      : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 1 1 2 ...
##  $ X20IMCAT201 : num  1.5 8.5 0 8 8 29.5 16.5 21 25.5 30 ...
##  $ X20IMCAT203 : num  1.5 6 0 13 4 24 8.5 15 11.5 32.5 ...
##  $ X20IMCAT205 : num  18 26 21 18 14 43 25 22 25 32 ...
##  $ X20IMCAT207 : num  1 5 0 4 10 19 10 11 1 12 ...
##  $ X20IMCAT209 : num  16 23 0 17 5 35 12 17 30 37 ...

Finding Column sums

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df1=select(df,-Student_Name,-Gender)
Sub_total=colSums(df1)
Sub_average=colMeans(df1)
round(Sub_average,2)
## X20IMCAT201 X20IMCAT203 X20IMCAT205 X20IMCAT207 X20IMCAT209 
##       20.54       12.17       26.68       11.29       24.68
library(ggplot2)
crop=ggplot(data=df, mapping=aes(x=Gender, y=X20IMCAT201))+geom_boxplot()+labs(x ="Gender", y = "Computer Organization marks")
crop

library(ggplot2)
  crop=ggplot(data=df, mapping=aes(x=Gender, y=X20IMCAT203))+geom_boxplot()+labs(x ="Gender", y = "Mathematics marks")
crop

library(ggplot2)
  crop=ggplot(data=df, mapping=aes(x=Gender, y=X20IMCAT205))+geom_boxplot()+labs(x ="Gender", y = "OOPS marks")
crop

library(ggplot2)
  crop=ggplot(data=df, mapping=aes(x=Gender, y=X20IMCAT207))+geom_boxplot()+labs(x ="Gender", y = "Accountancy marks")
crop

library(ggplot2)
  crop=ggplot(data=df, mapping=aes(x=Gender, y=X20IMCAT209))+geom_boxplot()+labs(x ="Gender", y = "DS marks")
crop

ggplot(data = df, aes(x = X20IMCAT201,fill = df$Gender)) + geom_histogram(binwidth = 5, fill = "seagreen",color = "red")+ 
  theme(legend.position = "top")#+facet_grid(~Gender)

ggplot(data = df, aes(x = X20IMCAT203,fill = df$Gender)) + geom_histogram(binwidth = 5, fill = "seagreen",color = "red")+ 
  theme(legend.position = "top")#+facet_grid(~Gender)

ggplot(data = df, aes(x = X20IMCAT205,fill = df$Gender)) + geom_histogram(binwidth = 5, fill = "seagreen",color = "red")+ 
  theme(legend.position = "top")#+facet_grid(~Gender)

ggplot(data = df, aes(x = X20IMCAT207,fill = df$Gender)) + geom_histogram(binwidth = 5, fill = "seagreen",color = "red")+ 
  theme(legend.position = "top")#+facet_grid(~Gender)

ggplot(data = df, aes(x = X20IMCAT209,fill = df$Gender)) + geom_histogram(binwidth = 5, fill = "seagreen",color = "red")+ 
  theme(legend.position = "top")#+facet_grid(~Gender)

plot(density(df$X20IMCAT201))

plot(density(df$X20IMCAT203))

plot(density(df$X20IMCAT205))

plot(density(df$X20IMCAT207))

plot(density(df$X20IMCAT209))

median(df$X20IMCAT201)
## [1] 21
library(DescTools)
Mode(df$X20IMCAT201)
## [1] 21
## attr(,"freq")
## [1] 4

User defined funtion

var(df$X20IMCAT201)
## [1] 81.95706
var(df$X20IMCAT203)
## [1] 65.95706
var(df$X20IMCAT205)
## [1] 102.113
var(df$X20IMCAT207)
## [1] 60.75325
var(df$X20IMCAT209)
## [1] 100.6948
cor(df$X20IMCAT203,df$X20IMCAT207)
## [1] 0.3856813
calcmode <- function(a) {  
vector <- unique(a)  
vector[which.max(tabulate(match(a, vector)))]  
}  
calcmode(df$X20IMCAT201)
## [1] 21
sd(df$X20IMCAT201)
## [1] 9.053014

Results and discussions

Various statistical analysis were experimental on a secondary data and appropriate visualizations were used to interrupt in statistical estimates.