data <- "https://vincentarelbundock.github.io/Rdatasets/csv/AER/BankWages.csv"
bwages <- read.csv(file = data , header = TRUE , sep = ",")
head(bwages)
##   X    job education gender minority
## 1 1 manage        15   male       no
## 2 2  admin        16   male       no
## 3 3  admin        12 female       no
## 4 4  admin         8 female       no
## 5 5  admin        15   male       no
## 6 6  admin        15   male       no

Question 1 Use the summary function to gain an overview of the data set.Then display the mean and median for atleast two attibutes

summary(bwages)
##        X             job              education        gender         
##  Min.   :  1.0   Length:474         Min.   : 8.00   Length:474        
##  1st Qu.:119.2   Class :character   1st Qu.:12.00   Class :character  
##  Median :237.5   Mode  :character   Median :12.00   Mode  :character  
##  Mean   :237.5                      Mean   :13.49                     
##  3rd Qu.:355.8                      3rd Qu.:15.00                     
##  Max.   :474.0                      Max.   :21.00                     
##    minority        
##  Length:474        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
mean(bwages$X)
## [1] 237.5
median(bwages$X)
## [1] 237.5
mean(bwages$education)
## [1] 13.49156
median(bwages$education)
## [1] 12

Question 2 Create a new dataframe with a subset of the columns and rows. Make sure to rename it

bwages2 <- subset(bwages[c(10:20),c(1,3)])
head(bwages2)
##     X education
## 10 10        12
## 11 11        16
## 12 12         8
## 13 13        15
## 14 14        15
## 15 15        12

Question 3 Create new column names for the new data frame

colnames(bwages2) <- c('Col1', 'Col2')
print(bwages2)
##    Col1 Col2
## 10   10   12
## 11   11   16
## 12   12    8
## 13   13   15
## 14   14   15
## 15   15   12
## 16   16   12
## 17   17   15
## 18   18   16
## 19   19   12
## 20   20   12

Question 4 Use the summary function to create an overview of your new data frame. Then print the mean and median for the same two attributes. Please compare

summary(bwages2)
##       Col1           Col2      
##  Min.   :10.0   Min.   : 8.00  
##  1st Qu.:12.5   1st Qu.:12.00  
##  Median :15.0   Median :12.00  
##  Mean   :15.0   Mean   :13.18  
##  3rd Qu.:17.5   3rd Qu.:15.00  
##  Max.   :20.0   Max.   :16.00
mean(bwages2$Col1)
## [1] 15
median(bwages2$Col1)
## [1] 15
mean(bwages2$Col2)
## [1] 13.18182
median(bwages2$Col2)
## [1] 12

Question 5 For atleast 3 values in a column please rename so that every value is renamed.

bwages2$Col1 <- paste("ID", bwages2$Col1, sep = "-") 
bwages2
##     Col1 Col2
## 10 ID-10   12
## 11 ID-11   16
## 12 ID-12    8
## 13 ID-13   15
## 14 ID-14   15
## 15 ID-15   12
## 16 ID-16   12
## 17 ID-17   15
## 18 ID-18   16
## 19 ID-19   12
## 20 ID-20   12

Question 6 Display enough rows to see examples of all of steps 1-5 above.

head(bwages2)
##     Col1 Col2
## 10 ID-10   12
## 11 ID-11   16
## 12 ID-12    8
## 13 ID-13   15
## 14 ID-14   15
## 15 ID-15   12

Question 7 BONUS – place the original .csv in a github file and have R read from the link.

data <- "https://vincentarelbundock.github.io/Rdatasets/csv/AER/BankWages.csv"
bwages <- read.csv(file = data , header = TRUE , sep = ",")
head(bwages)