Name: Apurva Vilas Bhalerao Roll No: 42 Division:CS A —
Q1 Using the given dataset solve the following questions. Dataset Used: Hair Eye Color Data Set
#import hair eye colour dataset(csv file)
read.csv("C:/Users/Apurva Bhalerao/OneDrive/Desktop/R lab session/hair_eye_color.csv")
## Person.No. Hair.Color Eye.Color
## 1 1 Blonde Blue
## 2 2 Blonde Blue
## 3 3 Blonde Green
## 4 4 Blonde Brown
## 5 5 Blonde Brown
## 6 6 Blonde Black
## 7 7 Red Blue
## 8 8 Red Green
## 9 9 Red Brown
## 10 10 Red Brown
## 11 11 Brown Blue
## 12 12 Brown Brown
## 13 13 Brown Brown
## 14 14 Brown Brown
## 15 15 Brown Brown
## 16 16 Brown Black
## 17 17 Brown Black
## 18 18 Black Blue
## 19 19 Black Brown
## 20 20 Black Brown
df1<-read.csv("C:/Users/Apurva Bhalerao/OneDrive/Desktop/R lab session/hair_eye_color.csv")
sum(df1$Eye.Color=="Brown")
## [1] 10
sum(df1$Hair.Color=="Blonde")
## [1] 6
a1<-subset(df1,Hair.Color=="Brown")
sum(a1$Eye.Color=="Black")
## [1] 2
green<-sum(df1$Eye.Color=="Green")
allColors<-length(df1$Eye.Color)
percent<-(green/allColors)*100
percent
## [1] 10
red<-nrow(subset(df1,Hair.Color=="Red" & Eye.Color=="Blue"))
percentage<-red*100/nrow(df1)
percentage
## [1] 5
Q2 Using the given dataset solve the following questions. Dataset Used: Germination Dataset
#Import germination data(excel file)
library(readxl)
df2<-read_excel("C:/Users/Apurva Bhalerao/OneDrive/Desktop/R lab session/germination.xlsx")
a2<-subset(df2,Box=="Uncovered" & water_amt==4)
mean(a2$germinated)
## [1] 78
germinated=df2$germinated
covered<-subset(df2,Box=="Covered",select=germinated)
median(covered$germinated)
## [1] 45
par(mfrow=c(1,2))
Covered<-subset(df2,Box=="Covered")
Uncovered<-subset(df2,Box=="Uncovered")
plot(Covered$water_amt,Covered$germinated,col="Green",pch=16,xlab="Water amount",ylab="Seeds germinated",main="For Covered Box")
plot(Uncovered$water_amt,Uncovered$germinated,col="Green",pch=16,xlab="Water amount",ylab="Seeds germinated",main="For Uncovered Box")
#INFERENCES
#from the scatterplots we can infer that when the water amount was increased upto 3, more seeds germinated in the covered boxes, but as we increase the water amount further, more seeds germinated in the uncovered box.
par(mfrow=c(1,2))
hist(Covered$germinated,label=T)
hist(Uncovered$germinated,label=T)
par(mfrow=c(1,1))
#INFERENCES
#from the histograms we can infer that more number of seeds germinated in case of uncovered boxes compared to covered boxes.
Q3 Using the given dataset solve the following questions. Dataset Used: Iris Dataset
df3<-iris
boxplot(df3$Sepal.Length)
plot(x=df3$Sepal.Length,y=df3$Petal.Length,col="Blue",pch=16,xlab="Sepal Length",ylab="Petal Length",main="Sepal Length VS Petal Length")