Name: Apurva Vilas Bhalerao Roll No: 42 Division:CS A —

Q1 Using the given dataset solve the following questions. Dataset Used: Hair Eye Color Data Set

#import hair eye colour dataset(csv file)

read.csv("C:/Users/Apurva Bhalerao/OneDrive/Desktop/R lab session/hair_eye_color.csv")
##    Person.No. Hair.Color Eye.Color
## 1           1     Blonde      Blue
## 2           2     Blonde      Blue
## 3           3     Blonde     Green
## 4           4     Blonde     Brown
## 5           5     Blonde     Brown
## 6           6     Blonde     Black
## 7           7        Red      Blue
## 8           8        Red     Green
## 9           9        Red     Brown
## 10         10        Red     Brown
## 11         11      Brown      Blue
## 12         12      Brown     Brown
## 13         13      Brown     Brown
## 14         14      Brown     Brown
## 15         15      Brown     Brown
## 16         16      Brown     Black
## 17         17      Brown     Black
## 18         18      Black      Blue
## 19         19      Black     Brown
## 20         20      Black     Brown
df1<-read.csv("C:/Users/Apurva Bhalerao/OneDrive/Desktop/R lab session/hair_eye_color.csv")
  1. How many people have Brown eye color?
sum(df1$Eye.Color=="Brown")
## [1] 10
  1. How many people have Blonde hair?
sum(df1$Hair.Color=="Blonde")
## [1] 6
  1. How many Brown haired people have Black eyes?
a1<-subset(df1,Hair.Color=="Brown")
sum(a1$Eye.Color=="Black")
## [1] 2
  1. What is the percentage of people with Green eyes?
green<-sum(df1$Eye.Color=="Green")
allColors<-length(df1$Eye.Color)
percent<-(green/allColors)*100
percent
## [1] 10
  1. What percentage of people have red hair and Blue eyes?
red<-nrow(subset(df1,Hair.Color=="Red" & Eye.Color=="Blue"))
percentage<-red*100/nrow(df1)
percentage
## [1] 5

Q2 Using the given dataset solve the following questions. Dataset Used: Germination Dataset

  1. What is the average number of seeds germinated for the uncovered boxes with level of watering equal to 4?
#Import germination data(excel file)

library(readxl)
df2<-read_excel("C:/Users/Apurva Bhalerao/OneDrive/Desktop/R lab session/germination.xlsx")
a2<-subset(df2,Box=="Uncovered" & water_amt==4)
mean(a2$germinated)
## [1] 78
  1. What is the median value for the data covered boxes?
germinated=df2$germinated
covered<-subset(df2,Box=="Covered",select=germinated)

median(covered$germinated)
## [1] 45
  1. Establish conclusions on the basis of available data:
  1. Association of levels of watering with the number of germinating seeds in case of covered boxes as well as uncovered boxes.
par(mfrow=c(1,2))
Covered<-subset(df2,Box=="Covered")
Uncovered<-subset(df2,Box=="Uncovered")
plot(Covered$water_amt,Covered$germinated,col="Green",pch=16,xlab="Water amount",ylab="Seeds germinated",main="For Covered Box")
plot(Uncovered$water_amt,Uncovered$germinated,col="Green",pch=16,xlab="Water amount",ylab="Seeds germinated",main="For Uncovered Box")

#INFERENCES
#from the scatterplots we can infer that when the water amount was increased upto 3, more seeds germinated in the covered boxes, but as we increase the water amount further, more seeds germinated in the uncovered box.
  1. Association of number of germinating seeds with the fact that the boxes were covered or uncovered.
par(mfrow=c(1,2))
hist(Covered$germinated,label=T)
hist(Uncovered$germinated,label=T)

par(mfrow=c(1,1))

#INFERENCES
#from the histograms we can infer that more number of seeds germinated in case of uncovered boxes compared to covered boxes.

Q3 Using the given dataset solve the following questions. Dataset Used: Iris Dataset

  1. Create a boxplot of sepal length
df3<-iris
boxplot(df3$Sepal.Length)

  1. Create a scatter plot of sepal length vs. petal length
plot(x=df3$Sepal.Length,y=df3$Petal.Length,col="Blue",pch=16,xlab="Sepal Length",ylab="Petal Length",main="Sepal Length VS Petal Length")