df <- read.csv("C:/CUNY/Assignments/Workshop/R/week2/datasets.csv", header = TRUE, stringsAsFactors = FALSE)
#df <- read.csv("https://github.com/san123i/CUNY/blob/master/Assignments_workshop/datasets.csv", header = TRUE, stringsAsFactors = FALSE)
#Get Summary of the initial data set
summary(df)
## Package Item Title Rows
## Length:1243 Length:1243 Length:1243 Min. : 0
## Class :character Class :character Class :character 1st Qu.: 30
## Mode :character Mode :character Mode :character Median : 90
## Mean : 1576
## 3rd Qu.: 451
## Max. :372864
## Cols has_logical has_binary has_numeric
## Min. : 1.00 Mode :logical Mode :logical Mode :logical
## 1st Qu.: 3.00 FALSE:1233 FALSE:717 FALSE:329
## Median : 5.00 TRUE :10 TRUE :526 TRUE :914
## Mean : 15.46
## 3rd Qu.: 9.00
## Max. :6831.00
## has_character CSV Doc
## Mode :logical Length:1243 Length:1243
## FALSE:1190 Class :character Class :character
## TRUE :53 Mode :character Mode :character
##
##
##
#Find mean of the 'rows' column
mean(df$Rows)
## [1] 1575.697
#Find mean of the 'cols' column
mean(df$Cols)
## [1] 15.465
#Find median of the 'rows' column
median(df$Rows)
## [1] 90
#Find median of the 'cols' column
median(df$Cols)
## [1] 5
#Create a ndw dataset from the existing data set by selecting few rows and columns
subset_frame <- df[c(1:20), c(3:5)]
subset_frame
## Title Rows Cols
## 1 Monthly Excess Returns 60 3
## 2 Delay in AIDS Reporting in England and Wales 570 6
## 3 Failures of Air-conditioning Equipment 12 1
## 4 Failures of Air-conditioning Equipment 24 1
## 5 Car Speeding and Warning Signs 8437 4
## 6 Remission Times for Acute Myelogenous Leukaemia 23 3
## 7 Beaver Body Temperature Data 100 4
## 8 Population of U.S. Cities 49 2
## 9 Spatial Location of Bramble Canes 823 3
## 10 Smoking Deaths Among Doctors 10 5
## 11 Calcium Uptake Data 27 2
## 12 Sugar-cane Disease Data 180 5
## 13 Simulated Manufacturing Process Data 75 1
## 14 Weight Data for Domestic Cats 97 3
## 15 Position of Muscle Caveolae 138 2
## 16 CD4 Counts for HIV-Positive Patients 20 2
## 17 Nested Bootstrap of cd4 data 999 2
## 18 Channing House Data 462 5
## 19 Population of U.S. Cities 10 2
## 20 Genetic Links to Left-handedness 37 2
#Add a new column to the new dataset and fill it with 'NA' value
subset_frame$NewColumn <- NA
subset_frame
## Title Rows Cols NewColumn
## 1 Monthly Excess Returns 60 3 NA
## 2 Delay in AIDS Reporting in England and Wales 570 6 NA
## 3 Failures of Air-conditioning Equipment 12 1 NA
## 4 Failures of Air-conditioning Equipment 24 1 NA
## 5 Car Speeding and Warning Signs 8437 4 NA
## 6 Remission Times for Acute Myelogenous Leukaemia 23 3 NA
## 7 Beaver Body Temperature Data 100 4 NA
## 8 Population of U.S. Cities 49 2 NA
## 9 Spatial Location of Bramble Canes 823 3 NA
## 10 Smoking Deaths Among Doctors 10 5 NA
## 11 Calcium Uptake Data 27 2 NA
## 12 Sugar-cane Disease Data 180 5 NA
## 13 Simulated Manufacturing Process Data 75 1 NA
## 14 Weight Data for Domestic Cats 97 3 NA
## 15 Position of Muscle Caveolae 138 2 NA
## 16 CD4 Counts for HIV-Positive Patients 20 2 NA
## 17 Nested Bootstrap of cd4 data 999 2 NA
## 18 Channing House Data 462 5 NA
## 19 Population of U.S. Cities 10 2 NA
## 20 Genetic Links to Left-handedness 37 2 NA
#Print summary of this new dataset
summary(subset_frame)
## Title Rows Cols NewColumn
## Length:20 Min. : 10.00 Min. :1.0 Mode:logical
## Class :character 1st Qu.: 23.75 1st Qu.:2.0 NA's:20
## Mode :character Median : 67.50 Median :2.5
## Mean : 607.65 Mean :2.9
## 3rd Qu.: 250.50 3rd Qu.:4.0
## Max. :8437.00 Max. :6.0
#Find mean of 'Rows' in this new dataset
mean(subset_frame$Rows)
## [1] 607.65
#Find mean of the 'Cols' column in this new dataset
mean(subset_frame$Cols)
## [1] 2.9
#Find median of 'Rows' in this new dataset
median(subset_frame$Rows)
## [1] 67.5
#Find median of the 'cols' column in this new dataset
median(subset_frame$Cols)
## [1] 2.5
#Fill the newly added column of this new dataset with value 'Excellent_Super'
subset_frame$NewColumn <- "Excellent_Super"
subset_frame
## Title Rows Cols
## 1 Monthly Excess Returns 60 3
## 2 Delay in AIDS Reporting in England and Wales 570 6
## 3 Failures of Air-conditioning Equipment 12 1
## 4 Failures of Air-conditioning Equipment 24 1
## 5 Car Speeding and Warning Signs 8437 4
## 6 Remission Times for Acute Myelogenous Leukaemia 23 3
## 7 Beaver Body Temperature Data 100 4
## 8 Population of U.S. Cities 49 2
## 9 Spatial Location of Bramble Canes 823 3
## 10 Smoking Deaths Among Doctors 10 5
## 11 Calcium Uptake Data 27 2
## 12 Sugar-cane Disease Data 180 5
## 13 Simulated Manufacturing Process Data 75 1
## 14 Weight Data for Domestic Cats 97 3
## 15 Position of Muscle Caveolae 138 2
## 16 CD4 Counts for HIV-Positive Patients 20 2
## 17 Nested Bootstrap of cd4 data 999 2
## 18 Channing House Data 462 5
## 19 Population of U.S. Cities 10 2
## 20 Genetic Links to Left-handedness 37 2
## NewColumn
## 1 Excellent_Super
## 2 Excellent_Super
## 3 Excellent_Super
## 4 Excellent_Super
## 5 Excellent_Super
## 6 Excellent_Super
## 7 Excellent_Super
## 8 Excellent_Super
## 9 Excellent_Super
## 10 Excellent_Super
## 11 Excellent_Super
## 12 Excellent_Super
## 13 Excellent_Super
## 14 Excellent_Super
## 15 Excellent_Super
## 16 Excellent_Super
## 17 Excellent_Super
## 18 Excellent_Super
## 19 Excellent_Super
## 20 Excellent_Super