#createbigcitycsv.R
#install.packages(“boot”) require(boot)
population <- data.frame(bigcity\(u, bigcity\)x) View(population)
colnames(population) <- c(“TwentyPop”, “ThirtyPop”) View(population)
#Use the summary function to gain an overview of the dataset. summary(population)
#Mean and median of TwentyPop
Mean20pop <- mean(population\(TwentyPop) Median20pop <- median(population\)TwentyPop)
#Mean and median of ThirtyPop
Mean30pop <- mean(population\(ThirtyPop) Median30pop <- median(population\)ThirtyPop)
#Display the mean and median for above two columns print(paste0(“The mean population of the randomly selected 49 US cities in 1920 is:”, round(Mean20pop, 2), " and the median population for the same dataset is: ", round(Median20pop, 2)))
print(paste0(“The mean population of the randomly selected 49 US cities in 1930 is:”, round(Mean30pop, 2), " and the median population for the same dataset is: ", round(Median30pop, 2)))
#Create a new dataframe with a subset of the columns and rows. Make sure to rename it
subsetpop <- subset(population, TwentyPop > 100 & ThirtyPop < 150)
#Create new column names for the new dataframe
colnames(subsetpop) <- c(“subsetcol1”, “subsetcol2”)
#Mean and median of subset 1920’s population
Meansubset20pop <- mean(subsetpop\(subsetcol1) Mediansubset20pop <- median(subsetpop\)subsetcol1)
##Mean and median of subset 1930’s population
Meansubset30pop <- mean(subsetpop\(subsetcol2) Mediansubset30pop <- median(subsetpop\)subsetcol2)
#Use the summary function to create an overview of your new data frame. Then print the mean and median for the same two #attributes. Compare
summary(subsetpop)
print(paste0(“The mean population of the subset in 1920 is:”, round(Meansubset20pop, 2), " and the median population for the same dataset is: ", round(Mediansubset20pop, 2)))
print(paste0(“The mean population of the subset in 1930 is:”, round(Meansubset30pop, 2), " and the median population for the same dataset is: ", round(Mediansubset30pop, 2)))
#Compare
print(paste0(“The mean population of the original dataset in 1920 is:”, round(Mean20pop, 2), " versus“,” the mean population for the subset which is: ", round(Meansubset20pop, 2)))
print(paste0(“The mean population of the original dataset in 1930 is:”, round(Mean30pop, 2), " versus“,” the mean population for the subset which is: ", round(Meansubset30pop, 2)))
print(paste0(“The median population of the original dataset in 1920 is:”, round(Median20pop, 2), " versus“,” the median population for the subset which is: ", round(Mediansubset20pop, 2)))
print(paste0(“The median population of the original dataset in 1930 is:”, round(Median30pop, 2), " versus“,” the median population for the subset which is: ", round(Mediansubset30pop, 2)))
#Rename atleast 3 values in a column
population\(TwentyPop <- as.character(population\)TwentyPop) population\(TwentyPop[population\)TwentyPop == “43”] <- “replace1” population\(TwentyPop[population\)TwentyPop == “36”] <- “replace2” population\(TwentyPop[population\)TwentyPop == “50”] <- “replace3”