Here is a list of data sets: http://vincentarelbundock.github.io/Rdatasets/ (click on the csv index for a list)
# Read CSV into R from local
myWorkingDir <- getwd()
#myWorkingDir
mySourceFile <- paste(myWorkingDir,"/datasets.csv", sep = "")
#mySourceFile
myLocalData <- read.csv(file=mySourceFile, header=TRUE, sep=",")
head(myLocalData)## Package Item Title Rows
## 1 boot acme Monthly Excess Returns 60
## 2 boot aids Delay in AIDS Reporting in England and Wales 570
## 3 boot aircondit Failures of Air-conditioning Equipment 12
## 4 boot aircondit7 Failures of Air-conditioning Equipment 24
## 5 boot amis Car Speeding and Warning Signs 8437
## 6 boot aml Remission Times for Acute Myelogenous Leukaemia 23
## Cols has_logical has_binary has_numeric has_character
## 1 3 FALSE FALSE TRUE TRUE
## 2 6 FALSE TRUE TRUE FALSE
## 3 1 FALSE FALSE TRUE FALSE
## 4 1 FALSE FALSE TRUE FALSE
## 5 4 FALSE TRUE TRUE FALSE
## 6 3 FALSE TRUE TRUE FALSE
## CSV
## 1 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/acme.csv
## 2 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aids.csv
## 3 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit.csv
## 4 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit7.csv
## 5 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/amis.csv
## 6 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aml.csv
## Doc
## 1 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/acme.html
## 2 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aids.html
## 3 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit.html
## 4 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit7.html
## 5 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/amis.html
## 6 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aml.html
head(myLocalData$Item)## [1] acme aids aircondit aircondit7 amis aml
## 1200 Levels: a10 abbey ability ability.cov absentee accdeaths ... Zelig.url
install.packages("readr",repos = "http://cran.us.r-project.org")## package 'readr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Debabrata\AppData\Local\Temp\Rtmp8WN1qp\downloaded_packages
summary(myLocalData)## Package Item
## Ecdat :130 lung : 3
## DAAG :121 aids : 2
## Stat2Data:119 channing: 2
## MASS : 87 Cigar : 2
## datasets : 84 cities : 2
## carData : 59 Clothing: 2
## (Other) :643 (Other) :1230
## Title
## Labour Training Evaluation Data : 11
## Seven data sets showing a bifactor solution. : 9
## Individual Preferences Over Immigration Policy : 6
## John Snow's Map and Data on the 1854 London Cholera Outbreak : 5
## Rain, wavesurge, portpirie and nidd datasets. : 4
## Australian and Related Historical Annual Climate Data, by region: 3
## (Other) :1205
## Rows Cols has_logical has_binary
## Min. : 0 Min. : 1.00 Mode :logical Mode :logical
## 1st Qu.: 30 1st Qu.: 3.00 FALSE:1233 FALSE:717
## Median : 90 Median : 5.00 TRUE :10 TRUE :526
## Mean : 1576 Mean : 15.46
## 3rd Qu.: 451 3rd Qu.: 9.00
## Max. :372864 Max. :6831.00
##
## has_numeric has_character
## Mode :logical Mode :logical
## FALSE:329 FALSE:1190
## TRUE :914 TRUE :53
##
##
##
##
## CSV
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/acme.csv : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aids.csv : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit.csv : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit7.csv: 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/amis.csv : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aml.csv : 1
## (Other) :1237
## Doc
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/acme.html : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aids.html : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit.html : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit7.html: 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/amis.html : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aml.html : 1
## (Other) :1237
mean(myLocalData$`Rows`)## [1] 1575.697
mean(myLocalData$Rows,na.rm=T) #'na.rm=T' is to remove missing values before calculating the mean## [1] 1575.697
mean(myLocalData[,'Rows'],na.rm=T) #'na.rm=T' is to remove missing values before calculating the mean## [1] 1575.697
mean(myLocalData[["Rows"]])## [1] 1575.697
mean(myLocalData$`Cols`)## [1] 15.465
median(myLocalData$Cols,na.rm=TRUE) #'na.rm=TRUE' is to remove missing values before calculating the mean## [1] 5
median(myLocalData[,'Cols'],na.rm=TRUE) #'na.rm=TRUE' is to remove missing values before calculating the mean## [1] 5
median(myLocalData[["Cols"]])## [1] 5
myDataFrame <- myLocalData[2:10, c("Item","Title","Rows","Cols")]
colnames(myDataFrame) <- c("Product","Type","Records#","Attributes#")
myDataFrame## Product Type Records#
## 2 aids Delay in AIDS Reporting in England and Wales 570
## 3 aircondit Failures of Air-conditioning Equipment 12
## 4 aircondit7 Failures of Air-conditioning Equipment 24
## 5 amis Car Speeding and Warning Signs 8437
## 6 aml Remission Times for Acute Myelogenous Leukaemia 23
## 7 beaver Beaver Body Temperature Data 100
## 8 bigcity Population of U.S. Cities 49
## 9 brambles Spatial Location of Bramble Canes 823
## 10 breslow Smoking Deaths Among Doctors 10
## Attributes#
## 2 6
## 3 1
## 4 1
## 5 4
## 6 3
## 7 4
## 8 2
## 9 3
## 10 5
colnames(myDataFrame) <- c("ProductName","TypeName","RecordsNumber","AttributesNumber")
myDataFrame## ProductName TypeName
## 2 aids Delay in AIDS Reporting in England and Wales
## 3 aircondit Failures of Air-conditioning Equipment
## 4 aircondit7 Failures of Air-conditioning Equipment
## 5 amis Car Speeding and Warning Signs
## 6 aml Remission Times for Acute Myelogenous Leukaemia
## 7 beaver Beaver Body Temperature Data
## 8 bigcity Population of U.S. Cities
## 9 brambles Spatial Location of Bramble Canes
## 10 breslow Smoking Deaths Among Doctors
## RecordsNumber AttributesNumber
## 2 570 6
## 3 12 1
## 4 24 1
## 5 8437 4
## 6 23 3
## 7 100 4
## 8 49 2
## 9 823 3
## 10 10 5
summary(myDataFrame)## ProductName TypeName
## aids :1 Failures of Air-conditioning Equipment :2
## aircondit :1 Beaver Body Temperature Data :1
## aircondit7:1 Car Speeding and Warning Signs :1
## amis :1 Delay in AIDS Reporting in England and Wales :1
## aml :1 Population of U.S. Cities :1
## beaver :1 Remission Times for Acute Myelogenous Leukaemia:1
## (Other) :3 (Other) :2
## RecordsNumber AttributesNumber
## Min. : 10 Min. :1.000
## 1st Qu.: 23 1st Qu.:2.000
## Median : 49 Median :3.000
## Mean :1116 Mean :3.222
## 3rd Qu.: 570 3rd Qu.:4.000
## Max. :8437 Max. :6.000
##
mean(myDataFrame$`RecordsNumber`)## [1] 1116.444
mean(myDataFrame$RecordsNumber,na.rm=T) #'na.rm=T' is to remove missing values before calculating the mean## [1] 1116.444
mean(myDataFrame[,'RecordsNumber'],na.rm=T) #'na.rm=T' is to remove missing values before calculating the mean## [1] 1116.444
mean(myDataFrame[["RecordsNumber"]])## [1] 1116.444
median(myDataFrame$`AttributesNumber`)## [1] 3
median(myDataFrame$AttributesNumber,na.rm=TRUE) #'na.rm=TRUE' is to remove missing values before calculating the mean## [1] 3
median(myDataFrame[,'AttributesNumber'],na.rm=TRUE) #'na.rm=TRUE' is to remove missing values before calculating the mean## [1] 3
median(myDataFrame[["AttributesNumber"]])## [1] 3
myDataFrame$TypeName <- gsub('Failures', 'Errors', myDataFrame$TypeName)
myDataFrame## ProductName TypeName
## 2 aids Delay in AIDS Reporting in England and Wales
## 3 aircondit Errors of Air-conditioning Equipment
## 4 aircondit7 Errors of Air-conditioning Equipment
## 5 amis Car Speeding and Warning Signs
## 6 aml Remission Times for Acute Myelogenous Leukaemia
## 7 beaver Beaver Body Temperature Data
## 8 bigcity Population of U.S. Cities
## 9 brambles Spatial Location of Bramble Canes
## 10 breslow Smoking Deaths Among Doctors
## RecordsNumber AttributesNumber
## 2 570 6
## 3 12 1
## 4 24 1
## 5 8437 4
## 6 23 3
## 7 100 4
## 8 49 2
## 9 823 3
## 10 10 5
myDataFrame## ProductName TypeName
## 2 aids Delay in AIDS Reporting in England and Wales
## 3 aircondit Errors of Air-conditioning Equipment
## 4 aircondit7 Errors of Air-conditioning Equipment
## 5 amis Car Speeding and Warning Signs
## 6 aml Remission Times for Acute Myelogenous Leukaemia
## 7 beaver Beaver Body Temperature Data
## 8 bigcity Population of U.S. Cities
## 9 brambles Spatial Location of Bramble Canes
## 10 breslow Smoking Deaths Among Doctors
## RecordsNumber AttributesNumber
## 2 570 6
## 3 12 1
## 4 24 1
## 5 8437 4
## 6 23 3
## 7 100 4
## 8 49 2
## 9 823 3
## 10 10 5
# Read CSV into R from GitHub
install.packages("RCurl",repos = "http://cran.us.r-project.org")## package 'RCurl' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Debabrata\AppData\Local\Temp\Rtmp8WN1qp\downloaded_packages
library(RCurl)## Loading required package: bitops
myGitHubURL<-"https://raw.githubusercontent.com/destination4debabrata/CUNY-Assignments/master/Week%202%20%5BJan%202%20-%20Jan%209%5D/R/datasets.csv"
myGitHubDataURL <- getURL(myGitHubURL)
myGitHubData <- read.csv(text = myGitHubDataURL)
#myGitHubData
head(myGitHubData)## Package Item Title Rows
## 1 boot acme Monthly Excess Returns 60
## 2 boot aids Delay in AIDS Reporting in England and Wales 570
## 3 boot aircondit Failures of Air-conditioning Equipment 12
## 4 boot aircondit7 Failures of Air-conditioning Equipment 24
## 5 boot amis Car Speeding and Warning Signs 8437
## 6 boot aml Remission Times for Acute Myelogenous Leukaemia 23
## Cols has_logical has_binary has_numeric has_character
## 1 3 FALSE FALSE TRUE TRUE
## 2 6 FALSE TRUE TRUE FALSE
## 3 1 FALSE FALSE TRUE FALSE
## 4 1 FALSE FALSE TRUE FALSE
## 5 4 FALSE TRUE TRUE FALSE
## 6 3 FALSE TRUE TRUE FALSE
## CSV
## 1 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/acme.csv
## 2 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aids.csv
## 3 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit.csv
## 4 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit7.csv
## 5 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/amis.csv
## 6 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aml.csv
## Doc
## 1 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/acme.html
## 2 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aids.html
## 3 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit.html
## 4 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit7.html
## 5 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/amis.html
## 6 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aml.html
#MyGitHubData$Item
head(myGitHubData$Item)## [1] acme aids aircondit aircondit7 amis aml
## 1200 Levels: a10 abbey ability ability.cov absentee accdeaths ... Zelig.url