From the list of data sets at http://vincentarelbundock.github.io/Rdatasets/, I chose the Ice Cream Consumption data set, because I love ice cream.
# Download and read icecream.csv
directory <- getwd()
download.file(url = "https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/Ecdat/Icecream.csv", destfile = paste(directory, "icecream.csv", sep = "/"))
icecream <- read.csv("icecream.csv")
head(icecream)
## X cons income price temp
## 1 1 0.386 78 0.270 41
## 2 2 0.374 79 0.282 56
## 3 3 0.393 81 0.277 63
## 4 4 0.425 80 0.280 68
## 5 5 0.406 76 0.272 69
## 6 6 0.344 78 0.262 65
# Use the summary function to gain an overview of the data set
summary(icecream)
## X cons income price
## Min. : 1.00 Min. :0.2560 Min. :76.00 Min. :0.2600
## 1st Qu.: 8.25 1st Qu.:0.3113 1st Qu.:79.25 1st Qu.:0.2685
## Median :15.50 Median :0.3515 Median :83.50 Median :0.2770
## Mean :15.50 Mean :0.3594 Mean :84.60 Mean :0.2753
## 3rd Qu.:22.75 3rd Qu.:0.3912 3rd Qu.:89.25 3rd Qu.:0.2815
## Max. :30.00 Max. :0.5480 Max. :96.00 Max. :0.2920
## temp
## Min. :24.00
## 1st Qu.:32.25
## Median :49.50
## Mean :49.10
## 3rd Qu.:63.75
## Max. :72.00
# Display the mean and median for at least two attributes
mean(icecream$price)
## [1] 0.2753
median(icecream$price)
## [1] 0.277
mean(icecream$temp)
## [1] 49.1
median(icecream$temp)
## [1] 49.5
# Create a new data frame with a subset of the columns and rows. Rename it.
icecream2 <- data.frame(icecream$price, icecream$temp)
head(icecream2)
## icecream.price icecream.temp
## 1 0.270 41
## 2 0.282 56
## 3 0.277 63
## 4 0.280 68
## 5 0.272 69
## 6 0.262 65
# Create new column names for the new data frame
colnames(icecream2) <- c("Price", "Temp")
head(icecream2)
## Price Temp
## 1 0.270 41
## 2 0.282 56
## 3 0.277 63
## 4 0.280 68
## 5 0.272 69
## 6 0.262 65
# Use the summary function to create an overview of the new data frame
summary(icecream2)
## Price Temp
## Min. :0.2600 Min. :24.00
## 1st Qu.:0.2685 1st Qu.:32.25
## Median :0.2770 Median :49.50
## Mean :0.2753 Mean :49.10
## 3rd Qu.:0.2815 3rd Qu.:63.75
## Max. :0.2920 Max. :72.00
# Print the mean and median for the same two attributes. Compare
mean(icecream2$Price)
## [1] 0.2753
median(icecream2$Price)
## [1] 0.277
mean(icecream2$Temp)
## [1] 49.1
median(icecream2$Temp)
## [1] 49.5
# The mean and median of the original data set and the subset are the same.
mean(icecream$price) == mean(icecream2$Price)
## [1] TRUE
median(icecream$price) == median(icecream2$Price)
## [1] TRUE
mean(icecream$temp) == mean(icecream2$Temp)
## [1] TRUE
median(icecream$temp) == median(icecream2$Temp)
## [1] TRUE
# For at least 3 values in a column, please rename so that every value in that column is renamed
# Let's do this in a copy so we can compare later
# For every value in price, let's multiply by 100
icecream3 <- icecream
icecream3$price <- icecream3$price * 100
# Compare
head(icecream$price)
## [1] 0.270 0.282 0.277 0.280 0.272 0.262
head(icecream3$price)
## [1] 27.0 28.2 27.7 28.0 27.2 26.2