#Read data from local source
calcium <-read.table(file="calcium.csv", header= TRUE, sep =",")
#Q1: Use the summary function to gain an overview of the data set.
#Then display the mean and median for at least two attributes.
summary(calcium)
## X time cal
## Min. : 1.0 Min. : 0.450 Min. :-0.00438
## 1st Qu.: 7.5 1st Qu.: 2.400 1st Qu.: 1.51317
## Median :14.0 Median : 6.100 Median : 3.00782
## Mean :14.0 Mean : 6.844 Mean : 2.68755
## 3rd Qu.:20.5 3rd Qu.:11.150 3rd Qu.: 3.52067
## Max. :27.0 Max. :15.000 Max. : 5.13825
mean(calcium$cal)
## [1] 2.687554
median(calcium$cal)
## [1] 3.00782
mean(calcium$time)
## [1] 6.844444
median(calcium$cal)
## [1] 3.00782
#Q2: Create a new data frame with a subset of the columns and rows. Make sure to rename it.
#Select Cal and time and their values for 8 rows, rename columns, sort the subsets and rename rows
theDF <- subset(calcium, calcium$time>1 & calcium$cal>1)
summary(theDF)
## X time cal
## Min. : 4.00 Min. : 1.30 Min. :1.173
## 1st Qu.:11.25 1st Qu.: 4.00 1st Qu.:2.625
## Median :16.50 Median : 8.05 Median :3.151
## Mean :16.41 Mean : 8.22 Mean :3.173
## 3rd Qu.:21.75 3rd Qu.:12.65 3rd Qu.:3.858
## Max. :27.00 Max. :15.00 Max. :5.138
print(theDF)
## X time cal
## 4 4 1.30 1.77967
## 7 7 2.40 1.75136
## 8 8 2.40 1.27497
## 9 9 2.40 1.17332
## 10 10 4.00 3.12273
## 11 11 4.00 2.60958
## 12 12 4.00 2.57429
## 13 13 6.10 3.17881
## 14 14 6.10 3.00782
## 15 15 6.10 2.67061
## 16 16 8.05 3.05959
## 17 17 8.05 3.94321
## 18 18 8.05 3.43726
## 19 19 11.15 4.80735
## 20 20 11.15 3.35583
## 21 21 11.15 2.78309
## 22 22 13.15 5.13825
## 23 23 13.15 4.70274
## 24 24 13.15 4.25702
## 25 25 15.00 3.60407
## 26 26 15.00 4.15029
## 27 27 15.00 3.42484
class(theDF)
## [1] "data.frame"
#Q3: Create new column names for the new data frame.
colnames(theDF)[1] <- 'Updated Name2'
colnames(theDF)[2] <- 'Updated Time2'
colnames(theDF)[3] <- 'Updated Cal2'
colnames(theDF)
## [1] "Updated Name2" "Updated Time2" "Updated Cal2"
# Q4: Use the summary function to create an overview of your new data frame.
#The print the mean and median for the same two attributes.
# Please compare.
summary(theDF)
## Updated Name2 Updated Time2 Updated Cal2
## Min. : 4.00 Min. : 1.30 Min. :1.173
## 1st Qu.:11.25 1st Qu.: 4.00 1st Qu.:2.625
## Median :16.50 Median : 8.05 Median :3.151
## Mean :16.41 Mean : 8.22 Mean :3.173
## 3rd Qu.:21.75 3rd Qu.:12.65 3rd Qu.:3.858
## Max. :27.00 Max. :15.00 Max. :5.138
mean(theDF$`Updated Name2`)
## [1] 16.40909
median(theDF$`Updated Name2`)
## [1] 16.5
mean(theDF$`Updated Cal2`)
## [1] 3.173032
median(theDF$`Updated Cal2`)
## [1] 3.15077
theDF
## Updated Name2 Updated Time2 Updated Cal2
## 4 4 1.30 1.77967
## 7 7 2.40 1.75136
## 8 8 2.40 1.27497
## 9 9 2.40 1.17332
## 10 10 4.00 3.12273
## 11 11 4.00 2.60958
## 12 12 4.00 2.57429
## 13 13 6.10 3.17881
## 14 14 6.10 3.00782
## 15 15 6.10 2.67061
## 16 16 8.05 3.05959
## 17 17 8.05 3.94321
## 18 18 8.05 3.43726
## 19 19 11.15 4.80735
## 20 20 11.15 3.35583
## 21 21 11.15 2.78309
## 22 22 13.15 5.13825
## 23 23 13.15 4.70274
## 24 24 13.15 4.25702
## 25 25 15.00 3.60407
## 26 26 15.00 4.15029
## 27 27 15.00 3.42484
#q5: For at least 3 values in a column please rename so that every value in that column is renamed.
#For example, suppose I have 20 values of the letter âeâ in one column.
#Rename those values so that all 20 would show as âexcellentâ.
#Add a character column to answer the question
theDF <- cbind(theDF, Remarks = factor(NA, levels = c("Good", "average", "poor")))
theDF[theDF$`Updated Cal2` >= 3, "Remarks"] <- "Good"
theDF[theDF$`Updated Cal2`< 3, "Remarks"] <- "Bad"
## Warning in `[<-.factor`(`*tmp*`, iseq, value = c("Bad", "Bad", "Bad",
## "Bad", : invalid factor level, NA generated
summary(theDF)
## Updated Name2 Updated Time2 Updated Cal2 Remarks
## Min. : 4.00 Min. : 1.30 Min. :1.173 Good :14
## 1st Qu.:11.25 1st Qu.: 4.00 1st Qu.:2.625 average: 0
## Median :16.50 Median : 8.05 Median :3.151 poor : 0
## Mean :16.41 Mean : 8.22 Mean :3.173 NA's : 8
## 3rd Qu.:21.75 3rd Qu.:12.65 3rd Qu.:3.858
## Max. :27.00 Max. :15.00 Max. :5.138
#q6: Display enough rows to see examples of all of steps 1-5 above.
theDF[1:15, ]
## Updated Name2 Updated Time2 Updated Cal2 Remarks
## 4 4 1.30 1.77967 <NA>
## 7 7 2.40 1.75136 <NA>
## 8 8 2.40 1.27497 <NA>
## 9 9 2.40 1.17332 <NA>
## 10 10 4.00 3.12273 Good
## 11 11 4.00 2.60958 <NA>
## 12 12 4.00 2.57429 <NA>
## 13 13 6.10 3.17881 Good
## 14 14 6.10 3.00782 Good
## 15 15 6.10 2.67061 <NA>
## 16 16 8.05 3.05959 Good
## 17 17 8.05 3.94321 Good
## 18 18 8.05 3.43726 Good
## 19 19 11.15 4.80735 Good
## 20 20 11.15 3.35583 Good