#Read data from local source
calcium <-read.table(file="calcium.csv", header= TRUE, sep =",")
#Q1: Use the summary function to gain an overview of the data set. 
#Then display the mean and median for at least two attributes.
summary(calcium)
##        X             time             cal          
##  Min.   : 1.0   Min.   : 0.450   Min.   :-0.00438  
##  1st Qu.: 7.5   1st Qu.: 2.400   1st Qu.: 1.51317  
##  Median :14.0   Median : 6.100   Median : 3.00782  
##  Mean   :14.0   Mean   : 6.844   Mean   : 2.68755  
##  3rd Qu.:20.5   3rd Qu.:11.150   3rd Qu.: 3.52067  
##  Max.   :27.0   Max.   :15.000   Max.   : 5.13825
mean(calcium$cal)
## [1] 2.687554
median(calcium$cal)
## [1] 3.00782
mean(calcium$time)
## [1] 6.844444
median(calcium$cal)
## [1] 3.00782
#Q2: Create a new data frame with a subset of the columns and rows. Make sure to rename it. 
#Select Cal and time and their values for 8 rows, rename columns, sort the subsets and rename rows
theDF <- subset(calcium, calcium$time>1 & calcium$cal>1)
summary(theDF)
##        X              time            cal       
##  Min.   : 4.00   Min.   : 1.30   Min.   :1.173  
##  1st Qu.:11.25   1st Qu.: 4.00   1st Qu.:2.625  
##  Median :16.50   Median : 8.05   Median :3.151  
##  Mean   :16.41   Mean   : 8.22   Mean   :3.173  
##  3rd Qu.:21.75   3rd Qu.:12.65   3rd Qu.:3.858  
##  Max.   :27.00   Max.   :15.00   Max.   :5.138
print(theDF)
##     X  time     cal
## 4   4  1.30 1.77967
## 7   7  2.40 1.75136
## 8   8  2.40 1.27497
## 9   9  2.40 1.17332
## 10 10  4.00 3.12273
## 11 11  4.00 2.60958
## 12 12  4.00 2.57429
## 13 13  6.10 3.17881
## 14 14  6.10 3.00782
## 15 15  6.10 2.67061
## 16 16  8.05 3.05959
## 17 17  8.05 3.94321
## 18 18  8.05 3.43726
## 19 19 11.15 4.80735
## 20 20 11.15 3.35583
## 21 21 11.15 2.78309
## 22 22 13.15 5.13825
## 23 23 13.15 4.70274
## 24 24 13.15 4.25702
## 25 25 15.00 3.60407
## 26 26 15.00 4.15029
## 27 27 15.00 3.42484
class(theDF)
## [1] "data.frame"
#Q3: Create new column names for the new data frame.
colnames(theDF)[1] <- 'Updated Name2'
colnames(theDF)[2] <- 'Updated Time2'
colnames(theDF)[3] <- 'Updated Cal2'
colnames(theDF)
## [1] "Updated Name2" "Updated Time2" "Updated Cal2"
# Q4: Use the summary function to create an overview of your new data frame. 
#The print the mean and median for the same two attributes. 
# Please compare.
summary(theDF)
##  Updated Name2   Updated Time2    Updated Cal2  
##  Min.   : 4.00   Min.   : 1.30   Min.   :1.173  
##  1st Qu.:11.25   1st Qu.: 4.00   1st Qu.:2.625  
##  Median :16.50   Median : 8.05   Median :3.151  
##  Mean   :16.41   Mean   : 8.22   Mean   :3.173  
##  3rd Qu.:21.75   3rd Qu.:12.65   3rd Qu.:3.858  
##  Max.   :27.00   Max.   :15.00   Max.   :5.138
mean(theDF$`Updated Name2`)
## [1] 16.40909
median(theDF$`Updated Name2`)
## [1] 16.5
mean(theDF$`Updated Cal2`)
## [1] 3.173032
median(theDF$`Updated Cal2`)
## [1] 3.15077
theDF
##    Updated Name2 Updated Time2 Updated Cal2
## 4              4          1.30      1.77967
## 7              7          2.40      1.75136
## 8              8          2.40      1.27497
## 9              9          2.40      1.17332
## 10            10          4.00      3.12273
## 11            11          4.00      2.60958
## 12            12          4.00      2.57429
## 13            13          6.10      3.17881
## 14            14          6.10      3.00782
## 15            15          6.10      2.67061
## 16            16          8.05      3.05959
## 17            17          8.05      3.94321
## 18            18          8.05      3.43726
## 19            19         11.15      4.80735
## 20            20         11.15      3.35583
## 21            21         11.15      2.78309
## 22            22         13.15      5.13825
## 23            23         13.15      4.70274
## 24            24         13.15      4.25702
## 25            25         15.00      3.60407
## 26            26         15.00      4.15029
## 27            27         15.00      3.42484
#q5: For at least 3 values in a column please rename so that every value in that column is renamed. 
#For example, suppose I have 20 values of the letter “e” in one column. 
#Rename those values so that all 20 would show as “excellent”.

#Add a character column to answer the question
theDF <- cbind(theDF, Remarks = factor(NA, levels = c("Good", "average", "poor")))

theDF[theDF$`Updated Cal2` >= 3, "Remarks"] <- "Good"
theDF[theDF$`Updated Cal2`< 3, "Remarks"] <- "Bad"
## Warning in `[<-.factor`(`*tmp*`, iseq, value = c("Bad", "Bad", "Bad",
## "Bad", : invalid factor level, NA generated
summary(theDF)
##  Updated Name2   Updated Time2    Updated Cal2      Remarks  
##  Min.   : 4.00   Min.   : 1.30   Min.   :1.173   Good   :14  
##  1st Qu.:11.25   1st Qu.: 4.00   1st Qu.:2.625   average: 0  
##  Median :16.50   Median : 8.05   Median :3.151   poor   : 0  
##  Mean   :16.41   Mean   : 8.22   Mean   :3.173   NA's   : 8  
##  3rd Qu.:21.75   3rd Qu.:12.65   3rd Qu.:3.858               
##  Max.   :27.00   Max.   :15.00   Max.   :5.138
#q6: Display enough rows to see examples of all of steps 1-5 above.
theDF[1:15, ]
##    Updated Name2 Updated Time2 Updated Cal2 Remarks
## 4              4          1.30      1.77967    <NA>
## 7              7          2.40      1.75136    <NA>
## 8              8          2.40      1.27497    <NA>
## 9              9          2.40      1.17332    <NA>
## 10            10          4.00      3.12273    Good
## 11            11          4.00      2.60958    <NA>
## 12            12          4.00      2.57429    <NA>
## 13            13          6.10      3.17881    Good
## 14            14          6.10      3.00782    Good
## 15            15          6.10      2.67061    <NA>
## 16            16          8.05      3.05959    Good
## 17            17          8.05      3.94321    Good
## 18            18          8.05      3.43726    Good
## 19            19         11.15      4.80735    Good
## 20            20         11.15      3.35583    Good