# Read CSV into R
# datasets <- read.csv(file="/Users/HR/Downloads/datasets.csv", header=TRUE, sep=",")
datasets <- read.csv(file="http://vincentarelbundock.github.io/Rdatasets/datasets.csv", header=TRUE, sep=",")

1,Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes

# Summarize CSV table
summary(datasets)
##       Package          Item     
##  Ecdat    :130   lung    :   3  
##  DAAG     :121   aids    :   2  
##  Stat2Data:119   channing:   2  
##  MASS     : 87   Cigar   :   2  
##  datasets : 84   cities  :   2  
##  carData  : 59   Clothing:   2  
##  (Other)  :643   (Other) :1230  
##                                                               Title     
##  Labour Training Evaluation Data                                 :  11  
##  Seven data sets showing a bifactor solution.                    :   9  
##  Individual Preferences Over Immigration Policy                  :   6  
##  John Snow's Map and Data on the 1854 London Cholera Outbreak    :   5  
##  Rain, wavesurge, portpirie and nidd datasets.                   :   4  
##  Australian and Related Historical Annual Climate Data, by region:   3  
##  (Other)                                                         :1205  
##       Rows             Cols         has_logical     has_binary     
##  Min.   :     0   Min.   :   1.00   Mode :logical   Mode :logical  
##  1st Qu.:    30   1st Qu.:   3.00   FALSE:1233      FALSE:717      
##  Median :    90   Median :   5.00   TRUE :10        TRUE :526      
##  Mean   :  1576   Mean   :  15.46                                  
##  3rd Qu.:   451   3rd Qu.:   9.00                                  
##  Max.   :372864   Max.   :6831.00                                  
##                                                                    
##  has_numeric     has_character  
##  Mode :logical   Mode :logical  
##  FALSE:329       FALSE:1190     
##  TRUE :914       TRUE :53       
##                                 
##                                 
##                                 
##                                 
##                                                                                  CSV      
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/acme.csv      :   1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aids.csv      :   1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit.csv :   1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit7.csv:   1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/amis.csv      :   1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aml.csv       :   1  
##  (Other)                                                                           :1237  
##                                                                                   Doc      
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/acme.html      :   1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aids.html      :   1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit.html :   1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit7.html:   1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/amis.html      :   1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aml.html       :   1  
##  (Other)                                                                            :1237

1,Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes

mean(datasets$Rows)
## [1] 1575.697
mean(datasets$Cols)
## [1] 15.465
median(datasets$Rows)
## [1] 90
median(datasets$Cols)
## [1] 5

2,Create a new data frame with a subset of the columns and rows. Make sure to rename it

boots <- data.frame(subset(datasets, Rows >= 1 & Cols == 1))

3,Create new column names for the new data frame

colnames(boots) <- c("X1", "X2", "X3", "X4", "X5", "Okay", "Fine", "Good", "Great")

4,Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare

summary(boots)
##           X1             X2    
##  boot      :8   SP500     : 2  
##  evir      :7   abbey     : 1  
##  MASS      :7   aircondit : 1  
##  DAAG      :4   aircondit7: 1  
##  datasets  :4   bmw       : 1  
##  robustbase:2   bostonc   : 1  
##  (Other)   :8   (Other)   :33  
##                                              X3           X4          
##  Failures of Air-conditioning Equipment       : 2   Min.   :   10.00  
##  Rain, wavesurge, portpirie and nidd datasets.: 2   1st Qu.:   30.75  
##  The River Nidd Data                          : 2   Median :   91.00  
##  Annual Precipitation in US Cities            : 1   Mean   : 2253.55  
##  Areas of the World's Major Landmasses        : 1   3rd Qu.: 2320.25  
##  Boston Housing Data - Corrected              : 1   Max.   :27716.00  
##  (Other)                                      :31                     
##        X5       Okay            Fine            Good        
##  Min.   :1   Mode :logical   Mode :logical   Mode :logical  
##  1st Qu.:1   FALSE:40        FALSE:40        FALSE:7        
##  Median :1                                   TRUE :33       
##  Mean   :1                                                  
##  3rd Qu.:1                                                  
##  Max.   :1                                                  
##                                                             
##    Great        
##  Mode :logical  
##  FALSE:40       
##                 
##                 
##                 
##                 
##                 
##                                                                                   NA    
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit.csv : 1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/aircondit7.csv: 1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/capability.csv: 1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/coal.csv      : 1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/darwin.csv    : 1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/boot/islay.csv     : 1  
##  (Other)                                                                           :34  
##                                                                                    NA    
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit.html : 1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/aircondit7.html: 1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/capability.html: 1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/coal.html      : 1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/darwin.html    : 1  
##  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/boot/islay.html     : 1  
##  (Other)                                                                            :34

4,Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare

mean(boots$X4)
## [1] 2253.55
mean(boots$X5)
## [1] 1
median(boots$X4)
## [1] 91
median(boots$X5)
## [1] 1

5,For at least 3 values in a column please rename so that every value in that column is renamed.

# Convert Great to characters
boots$Great <- as.character(boots$Great)
# Replace all occurances of 1 with newlyweds
boots$Great[boots$Great == "1"] <- "nice"

6,Display enough rows to see examples of all of steps 1-5 above.

# Sort rows by column Great
example <- boots[order(boots$Great),]
# Display last 10 rows
tail(example, n=10)
##              X1             X2
## 778        MASS       galaxies
## 796        MASS        newcomb
## 815        MASS         shrimp
## 821        MASS          SP500
## 848  mosaicData          Cards
## 926       psych epi.dictionary
## 980  robustbase         cushny
## 990  robustbase            los
## 1169     texmex           nidd
## 1171     texmex           rain
##                                                                  X3    X4
## 778                                      Velocities for 82 Galaxies    82
## 796             Newcomb's Measurements of the Passage Time of Light    66
## 815                         Percentage of Shrimp in Shrimp Cocktail    18
## 821                           Returns of the Standard and Poors 500  2780
## 848                                          Standard Deck of Cards    52
## 926  Eysenck Personality Inventory (EPI) data for 3570 participants    57
## 980                   Cushny and Peebles Prolongation of Sleep Data    10
## 990                                             Length of Stay Data   201
## 1169                  Rain, wavesurge, portpirie and nidd datasets.   154
## 1171                  Rain, wavesurge, portpirie and nidd datasets. 17531
##      X5  Okay  Fine  Good Great
## 778   1 FALSE FALSE  TRUE FALSE
## 796   1 FALSE FALSE  TRUE FALSE
## 815   1 FALSE FALSE  TRUE FALSE
## 821   1 FALSE FALSE  TRUE FALSE
## 848   1 FALSE FALSE FALSE FALSE
## 926   1 FALSE FALSE FALSE FALSE
## 980   1 FALSE FALSE  TRUE FALSE
## 990   1 FALSE FALSE FALSE FALSE
## 1169  1 FALSE FALSE  TRUE FALSE
## 1171  1 FALSE FALSE  TRUE FALSE
##                                                                                           NA
## 778         https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/MASS/galaxies.csv
## 796          https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/MASS/newcomb.csv
## 815           https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/MASS/shrimp.csv
## 821            https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/MASS/SP500.csv
## 848      https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/mosaicData/Cards.csv
## 926  https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/psych/epi.dictionary.csv
## 980     https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/robustbase/cushny.csv
## 990        https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/robustbase/los.csv
## 1169          https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/texmex/nidd.csv
## 1171          https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/texmex/rain.csv
##                                                                                            NA
## 778         https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/MASS/galaxies.html
## 796          https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/MASS/newcomb.html
## 815           https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/MASS/shrimp.html
## 821            https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/MASS/SP500.html
## 848      https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/mosaicData/Cards.html
## 926  https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/psych/epi.dictionary.html
## 980     https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/robustbase/cushny.html
## 990        https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/robustbase/los.html
## 1169          https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/texmex/nidd.html
## 1171          https://raw.github.com/vincentarelbundock/Rdatasets/master/doc/texmex/rain.html

7,BONUS - place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and career

# Read CSV into R
# datasets <- read.csv(file="/Users/HR/Downloads/datasets.csv", header=TRUE, sep=",")
datasets <- read.csv(file="http://vincentarelbundock.github.io/Rdatasets/datasets.csv", header=TRUE, sep=",")

My RPubs link is http://rpubs.com/Zchen116/455491