For this assignment, I chose Insect Spray Dataset for analysis
I read from the Github link into R
##filepath <-"C:/Users/Mezue/Documents/R_codes/R_codes/Insect_Spray.csv"
filepath <- "https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/datasets/InsectSprays.csv
"
Insect_Df <- read.csv(file=filepath,head=TRUE,sep=",",stringsAsFactors = FALSE)
head(Insect_Df)
## X count spray
## 1 1 10 A
## 2 2 7 A
## 3 3 20 A
## 4 4 14 A
## 5 5 14 A
## 6 6 12 A
Bonus question, reading csv file directly from Github link
filepath <- "https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/datasets/InsectSprays.csv
"
InsectSpray_Df <- read.table(file=filepath,header = TRUE,sep=",",stringsAsFactors = FALSE)
View and review data and summary of data stats
head(Insect_Df)
## X count spray
## 1 1 10 A
## 2 2 7 A
## 3 3 20 A
## 4 4 14 A
## 5 5 14 A
## 6 6 12 A
summary(Insect_Df)
## X count spray
## Min. : 1.00 Min. : 0.00 Length:72
## 1st Qu.:18.75 1st Qu.: 3.00 Class :character
## Median :36.50 Median : 7.00 Mode :character
## Mean :36.50 Mean : 9.50
## 3rd Qu.:54.25 3rd Qu.:14.25
## Max. :72.00 Max. :26.00
Calculate Mean and median of the first attribute (X)
#Mean and median of attritibute X
mean(Insect_Df[,1])
## [1] 36.5
median(Insect_Df[,1])
## [1] 36.5
#Mean and median of attritibute Count
mean(Insect_Df$count)
## [1] 9.5
median(Insect_Df$count)
## [1] 7
Create a new data frame with a subset of the columns and rows.
#Create a new Data Frame with subset of rows and columns
Insect_Sprays_DF <- Insect_Df[1:50,1:3]
Create new column names for the new data frame.
names(Insect_Sprays_DF) <- c("Row_Number","Dead_Insects_Count","Spray_type")
Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare
summary(Insect_Sprays_DF)
## Row_Number Dead_Insects_Count Spray_type
## Min. : 1.00 Min. : 0 Length:50
## 1st Qu.:13.25 1st Qu.: 3 Class :character
## Median :25.50 Median : 7 Mode :character
## Mean :25.50 Mean : 9
## 3rd Qu.:37.75 3rd Qu.:14
## Max. :50.00 Max. :23
#Mean and median of attritibute Row_Number
mean(Insect_Sprays_DF$Row_Number)
## [1] 25.5
median(Insect_Sprays_DF$Row_Number)
## [1] 25.5
#Mean and median of attritibute Dead_Insects_Count
mean(Insect_Sprays_DF$Dead_Insects_Count)
## [1] 9
median(Insect_Sprays_DF$Dead_Insects_Count)
## [1] 7
For at least 3 values in a column please rename so that every value in that column is renamed
Insect_Sprays_DF[Insect_Sprays_DF$Spray_type =='A',3] <- "Avid_Spray"
Insect_Sprays_DF[Insect_Sprays_DF$Spray_type =='B',3] <- "Brandt_Spray"
Insect_Sprays_DF[Insect_Sprays_DF$Spray_type =='C',3] <- "Cimi_Spray"
Display enough rows to see examples of all of steps 1-5 above.
head(Insect_Sprays_DF,30)
## Row_Number Dead_Insects_Count Spray_type
## 1 1 10 Avid_Spray
## 2 2 7 Avid_Spray
## 3 3 20 Avid_Spray
## 4 4 14 Avid_Spray
## 5 5 14 Avid_Spray
## 6 6 12 Avid_Spray
## 7 7 10 Avid_Spray
## 8 8 23 Avid_Spray
## 9 9 17 Avid_Spray
## 10 10 20 Avid_Spray
## 11 11 14 Avid_Spray
## 12 12 13 Avid_Spray
## 13 13 11 Brandt_Spray
## 14 14 17 Brandt_Spray
## 15 15 21 Brandt_Spray
## 16 16 11 Brandt_Spray
## 17 17 16 Brandt_Spray
## 18 18 14 Brandt_Spray
## 19 19 17 Brandt_Spray
## 20 20 17 Brandt_Spray
## 21 21 19 Brandt_Spray
## 22 22 21 Brandt_Spray
## 23 23 7 Brandt_Spray
## 24 24 13 Brandt_Spray
## 25 25 0 Cimi_Spray
## 26 26 1 Cimi_Spray
## 27 27 7 Cimi_Spray
## 28 28 2 Cimi_Spray
## 29 29 3 Cimi_Spray
## 30 30 1 Cimi_Spray