df <- read.csv("C:/CUNY/Assignments/Workshop/R/week2/datasets.csv", header = TRUE, stringsAsFactors = FALSE)
#df <- read.csv("https://github.com/san123i/CUNY/blob/master/Assignments_workshop/datasets.csv", header = TRUE, stringsAsFactors = FALSE)

#Get Summary of the initial data set
summary(df)

##    Package              Item              Title                Rows       
##  Length:1243        Length:1243        Length:1243        Min.   :     0  
##  Class :character   Class :character   Class :character   1st Qu.:    30  
##  Mode  :character   Mode  :character   Mode  :character   Median :    90  
##                                                           Mean   :  1576  
##                                                           3rd Qu.:   451  
##                                                           Max.   :372864  
##       Cols         has_logical     has_binary      has_numeric    
##  Min.   :   1.00   Mode :logical   Mode :logical   Mode :logical  
##  1st Qu.:   3.00   FALSE:1233      FALSE:717       FALSE:329      
##  Median :   5.00   TRUE :10        TRUE :526       TRUE :914      
##  Mean   :  15.46                                                  
##  3rd Qu.:   9.00                                                  
##  Max.   :6831.00                                                  
##  has_character       CSV                Doc           
##  Mode :logical   Length:1243        Length:1243       
##  FALSE:1190      Class :character   Class :character  
##  TRUE :53        Mode  :character   Mode  :character  
##                                                       
##                                                       
##

#Find mean of the 'rows' column
mean(df$Rows)

## [1] 1575.697

#Find mean of the 'cols' column
mean(df$Cols)

## [1] 15.465

#Find median of the 'rows' column
median(df$Rows)

## [1] 90

#Find median of the 'cols' column
median(df$Cols)

## [1] 5

#Create a ndw dataset from the existing data set by selecting few rows and columns
subset_frame <- df[c(1:20), c(3:5)]
subset_frame

##                                              Title Rows Cols
## 1                           Monthly Excess Returns   60    3
## 2     Delay in AIDS Reporting in England and Wales  570    6
## 3           Failures of Air-conditioning Equipment   12    1
## 4           Failures of Air-conditioning Equipment   24    1
## 5                   Car Speeding and Warning Signs 8437    4
## 6  Remission Times for Acute Myelogenous Leukaemia   23    3
## 7                     Beaver Body Temperature Data  100    4
## 8                        Population of U.S. Cities   49    2
## 9                Spatial Location of Bramble Canes  823    3
## 10                    Smoking Deaths Among Doctors   10    5
## 11                             Calcium Uptake Data   27    2
## 12                         Sugar-cane Disease Data  180    5
## 13            Simulated Manufacturing Process Data   75    1
## 14                   Weight Data for Domestic Cats   97    3
## 15                     Position of Muscle Caveolae  138    2
## 16            CD4 Counts for HIV-Positive Patients   20    2
## 17                    Nested Bootstrap of cd4 data  999    2
## 18                             Channing House Data  462    5
## 19                       Population of U.S. Cities   10    2
## 20                Genetic Links to Left-handedness   37    2

#Add a new column to the new dataset and fill it with 'NA' value
subset_frame$NewColumn <- NA
subset_frame

##                                              Title Rows Cols NewColumn
## 1                           Monthly Excess Returns   60    3        NA
## 2     Delay in AIDS Reporting in England and Wales  570    6        NA
## 3           Failures of Air-conditioning Equipment   12    1        NA
## 4           Failures of Air-conditioning Equipment   24    1        NA
## 5                   Car Speeding and Warning Signs 8437    4        NA
## 6  Remission Times for Acute Myelogenous Leukaemia   23    3        NA
## 7                     Beaver Body Temperature Data  100    4        NA
## 8                        Population of U.S. Cities   49    2        NA
## 9                Spatial Location of Bramble Canes  823    3        NA
## 10                    Smoking Deaths Among Doctors   10    5        NA
## 11                             Calcium Uptake Data   27    2        NA
## 12                         Sugar-cane Disease Data  180    5        NA
## 13            Simulated Manufacturing Process Data   75    1        NA
## 14                   Weight Data for Domestic Cats   97    3        NA
## 15                     Position of Muscle Caveolae  138    2        NA
## 16            CD4 Counts for HIV-Positive Patients   20    2        NA
## 17                    Nested Bootstrap of cd4 data  999    2        NA
## 18                             Channing House Data  462    5        NA
## 19                       Population of U.S. Cities   10    2        NA
## 20                Genetic Links to Left-handedness   37    2        NA

#Print summary of this new dataset
summary(subset_frame)

##     Title                Rows              Cols     NewColumn     
##  Length:20          Min.   :  10.00   Min.   :1.0   Mode:logical  
##  Class :character   1st Qu.:  23.75   1st Qu.:2.0   NA's:20       
##  Mode  :character   Median :  67.50   Median :2.5                 
##                     Mean   : 607.65   Mean   :2.9                 
##                     3rd Qu.: 250.50   3rd Qu.:4.0                 
##                     Max.   :8437.00   Max.   :6.0

#Find mean of 'Rows' in this new dataset
mean(subset_frame$Rows)

## [1] 607.65

#Find mean of the 'Cols' column in this new dataset
mean(subset_frame$Cols)

## [1] 2.9

#Find median of 'Rows' in this new dataset
median(subset_frame$Rows)

## [1] 67.5

#Find median of the 'cols' column in this new dataset
median(subset_frame$Cols)

## [1] 2.5

#Fill the newly added column of this new dataset with value 'Excellent_Super'
subset_frame$NewColumn <- "Excellent_Super"
subset_frame

##                                              Title Rows Cols
## 1                           Monthly Excess Returns   60    3
## 2     Delay in AIDS Reporting in England and Wales  570    6
## 3           Failures of Air-conditioning Equipment   12    1
## 4           Failures of Air-conditioning Equipment   24    1
## 5                   Car Speeding and Warning Signs 8437    4
## 6  Remission Times for Acute Myelogenous Leukaemia   23    3
## 7                     Beaver Body Temperature Data  100    4
## 8                        Population of U.S. Cities   49    2
## 9                Spatial Location of Bramble Canes  823    3
## 10                    Smoking Deaths Among Doctors   10    5
## 11                             Calcium Uptake Data   27    2
## 12                         Sugar-cane Disease Data  180    5
## 13            Simulated Manufacturing Process Data   75    1
## 14                   Weight Data for Domestic Cats   97    3
## 15                     Position of Muscle Caveolae  138    2
## 16            CD4 Counts for HIV-Positive Patients   20    2
## 17                    Nested Bootstrap of cd4 data  999    2
## 18                             Channing House Data  462    5
## 19                       Population of U.S. Cities   10    2
## 20                Genetic Links to Left-handedness   37    2
##          NewColumn
## 1  Excellent_Super
## 2  Excellent_Super
## 3  Excellent_Super
## 4  Excellent_Super
## 5  Excellent_Super
## 6  Excellent_Super
## 7  Excellent_Super
## 8  Excellent_Super
## 9  Excellent_Super
## 10 Excellent_Super
## 11 Excellent_Super
## 12 Excellent_Super
## 13 Excellent_Super
## 14 Excellent_Super
## 15 Excellent_Super
## 16 Excellent_Super
## 17 Excellent_Super
## 18 Excellent_Super
## 19 Excellent_Super
## 20 Excellent_Super