# Read CSV into R
# datasets <- read.csv(file="/Users/HR/Downloads/datasets.csv", header=TRUE, sep=",")
datasets <- read.csv(file="http://vincentarelbundock.github.io/Rdatasets/datasets.csv", header=TRUE, sep=",")
1,Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes
# Summarize CSV table
summary(datasets)
## Package Item
## Ecdat :130 lung : 3
## DAAG :121 aids : 2
## Stat2Data:119 channing: 2
## MASS : 87 Cigar : 2
## datasets : 84 cities : 2
## carData : 59 Clothing: 2
## (Other) :643 (Other) :1230
## Title
## Labour Training Evaluation Data : 11
## Seven data sets showing a bifactor solution. : 9
## Individual Preferences Over Immigration Policy : 6
## John Snow's Map and Data on the 1854 London Cholera Outbreak : 5
## Rain, wavesurge, portpirie and nidd datasets. : 4
## Australian and Related Historical Annual Climate Data, by region: 3
## (Other) :1205
## Rows Cols has_logical has_binary
## Min. : 0 Min. : 1.00 Mode :logical Mode :logical
## 1st Qu.: 30 1st Qu.: 3.00 FALSE:1233 FALSE:717
## Median : 90 Median : 5.00 TRUE :10 TRUE :526
## Mean : 1576 Mean : 15.46
## 3rd Qu.: 451 3rd Qu.: 9.00
## Max. :372864 Max. :6831.00
##
## has_numeric has_character
## Mode :logical Mode :logical
## FALSE:329 FALSE:1190
## TRUE :914 TRUE :53
##
##
##
##
## CSV
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/acme.csv : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aids.csv : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit.csv : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit7.csv: 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/amis.csv : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aml.csv : 1
## (Other) :1237
## Doc
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/acme.html : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aids.html : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit.html : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit7.html: 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/amis.html : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aml.html : 1
## (Other) :1237
1,Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes
mean(datasets$Rows)
## [1] 1575.697
mean(datasets$Cols)
## [1] 15.465
median(datasets$Rows)
## [1] 90
median(datasets$Cols)
## [1] 5
2,Create a new data frame with a subset of the columns and rows. Make sure to rename it
boots <- data.frame(subset(datasets, Rows >= 1 & Cols == 1))
3,Create new column names for the new data frame
colnames(boots) <- c("X1", "X2", "X3", "X4", "X5", "Okay", "Fine", "Good", "Great")
4,Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare
summary(boots)
## X1 X2
## boot :8 SP500 : 2
## evir :7 abbey : 1
## MASS :7 aircondit : 1
## DAAG :4 aircondit7: 1
## datasets :4 bmw : 1
## robustbase:2 bostonc : 1
## (Other) :8 (Other) :33
## X3 X4
## Failures of Air-conditioning Equipment : 2 Min. : 10.00
## Rain, wavesurge, portpirie and nidd datasets.: 2 1st Qu.: 30.75
## The River Nidd Data : 2 Median : 91.00
## Annual Precipitation in US Cities : 1 Mean : 2253.55
## Areas of the World's Major Landmasses : 1 3rd Qu.: 2320.25
## Boston Housing Data - Corrected : 1 Max. :27716.00
## (Other) :31
## X5 Okay Fine Good
## Min. :1 Mode :logical Mode :logical Mode :logical
## 1st Qu.:1 FALSE:40 FALSE:40 FALSE:7
## Median :1 TRUE :33
## Mean :1
## 3rd Qu.:1
## Max. :1
##
## Great
## Mode :logical
## FALSE:40
##
##
##
##
##
## NA
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit.csv : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit7.csv: 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/capability.csv: 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/coal.csv : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/darwin.csv : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/islay.csv : 1
## (Other) :34
## NA
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit.html : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit7.html: 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/capability.html: 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/coal.html : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/darwin.html : 1
## https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/islay.html : 1
## (Other) :34
4,Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare
mean(boots$X4)
## [1] 2253.55
mean(boots$X5)
## [1] 1
median(boots$X4)
## [1] 91
median(boots$X5)
## [1] 1
5,For at least 3 values in a column please rename so that every value in that column is renamed.
# Convert Great to characters
boots$Great <- as.character(boots$Great)
# Replace all occurances of 1 with newlyweds
boots$Great[boots$Great == "1"] <- "nice"
6,Display enough rows to see examples of all of steps 1-5 above.
# Sort rows by column Great
example <- boots[order(boots$Great),]
# Display last 10 rows
tail(example, n=10)
## X1 X2
## 778 MASS galaxies
## 796 MASS newcomb
## 815 MASS shrimp
## 821 MASS SP500
## 848 mosaicData Cards
## 926 psych epi.dictionary
## 980 robustbase cushny
## 990 robustbase los
## 1169 texmex nidd
## 1171 texmex rain
## X3 X4
## 778 Velocities for 82 Galaxies 82
## 796 Newcomb's Measurements of the Passage Time of Light 66
## 815 Percentage of Shrimp in Shrimp Cocktail 18
## 821 Returns of the Standard and Poors 500 2780
## 848 Standard Deck of Cards 52
## 926 Eysenck Personality Inventory (EPI) data for 3570 participants 57
## 980 Cushny and Peebles Prolongation of Sleep Data 10
## 990 Length of Stay Data 201
## 1169 Rain, wavesurge, portpirie and nidd datasets. 154
## 1171 Rain, wavesurge, portpirie and nidd datasets. 17531
## X5 Okay Fine Good Great
## 778 1 FALSE FALSE TRUE FALSE
## 796 1 FALSE FALSE TRUE FALSE
## 815 1 FALSE FALSE TRUE FALSE
## 821 1 FALSE FALSE TRUE FALSE
## 848 1 FALSE FALSE FALSE FALSE
## 926 1 FALSE FALSE FALSE FALSE
## 980 1 FALSE FALSE TRUE FALSE
## 990 1 FALSE FALSE FALSE FALSE
## 1169 1 FALSE FALSE TRUE FALSE
## 1171 1 FALSE FALSE TRUE FALSE
## NA
## 778 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/MASS/galaxies.csv
## 796 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/MASS/newcomb.csv
## 815 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/MASS/shrimp.csv
## 821 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/MASS/SP500.csv
## 848 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/mosaicData/Cards.csv
## 926 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/psych/epi.dictionary.csv
## 980 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/robustbase/cushny.csv
## 990 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/robustbase/los.csv
## 1169 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/texmex/nidd.csv
## 1171 https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/texmex/rain.csv
## NA
## 778 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/MASS/galaxies.html
## 796 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/MASS/newcomb.html
## 815 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/MASS/shrimp.html
## 821 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/MASS/SP500.html
## 848 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/mosaicData/Cards.html
## 926 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/psych/epi.dictionary.html
## 980 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/robustbase/cushny.html
## 990 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/robustbase/los.html
## 1169 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/texmex/nidd.html
## 1171 https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/texmex/rain.html
7,BONUS - place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career
# Read CSV into R
# datasets <- read.csv(file="/Users/HR/Downloads/datasets.csv", header=TRUE, sep=",")
datasets <- read.csv(file="http://vincentarelbundock.github.io/Rdatasets/datasets.csv", header=TRUE, sep=",")
My RPubs link is http://rpubs.com/Zchen116/455491