#createbigcitycsv.R

#install.packages(“boot”) require(boot)

population <- data.frame(bigcity\(u, bigcity\)x) View(population)

colnames(population) <- c(“TwentyPop”, “ThirtyPop”) View(population)

#Use the summary function to gain an overview of the dataset. summary(population)

#Mean and median of TwentyPop

Mean20pop <- mean(population\(TwentyPop) Median20pop <- median(population\)TwentyPop)

#Mean and median of ThirtyPop

Mean30pop <- mean(population\(ThirtyPop) Median30pop <- median(population\)ThirtyPop)

#Display the mean and median for above two columns print(paste0(“The mean population of the randomly selected 49 US cities in 1920 is:”, round(Mean20pop, 2), " and the median population for the same dataset is: ", round(Median20pop, 2)))

print(paste0(“The mean population of the randomly selected 49 US cities in 1930 is:”, round(Mean30pop, 2), " and the median population for the same dataset is: ", round(Median30pop, 2)))

#Create a new dataframe with a subset of the columns and rows. Make sure to rename it

subsetpop <- subset(population, TwentyPop > 100 & ThirtyPop < 150)

#Create new column names for the new dataframe

colnames(subsetpop) <- c(“subsetcol1”, “subsetcol2”)

#Mean and median of subset 1920’s population

Meansubset20pop <- mean(subsetpop\(subsetcol1) Mediansubset20pop <- median(subsetpop\)subsetcol1)

##Mean and median of subset 1930’s population

Meansubset30pop <- mean(subsetpop\(subsetcol2) Mediansubset30pop <- median(subsetpop\)subsetcol2)

#Use the summary function to create an overview of your new data frame. Then print the mean and median for the same two #attributes. Compare

summary(subsetpop)

print(paste0(“The mean population of the subset in 1920 is:”, round(Meansubset20pop, 2), " and the median population for the same dataset is: ", round(Mediansubset20pop, 2)))

print(paste0(“The mean population of the subset in 1930 is:”, round(Meansubset30pop, 2), " and the median population for the same dataset is: ", round(Mediansubset30pop, 2)))

#Compare

print(paste0(“The mean population of the original dataset in 1920 is:”, round(Mean20pop, 2), " versus“,” the mean population for the subset which is: ", round(Meansubset20pop, 2)))

print(paste0(“The mean population of the original dataset in 1930 is:”, round(Mean30pop, 2), " versus“,” the mean population for the subset which is: ", round(Meansubset30pop, 2)))

print(paste0(“The median population of the original dataset in 1920 is:”, round(Median20pop, 2), " versus“,” the median population for the subset which is: ", round(Mediansubset20pop, 2)))

print(paste0(“The median population of the original dataset in 1930 is:”, round(Median30pop, 2), " versus“,” the median population for the subset which is: ", round(Mediansubset30pop, 2)))

#Rename atleast 3 values in a column

population\(TwentyPop <- as.character(population\)TwentyPop) population\(TwentyPop[population\)TwentyPop == “43”] <- “replace1” population\(TwentyPop[population\)TwentyPop == “36”] <- “replace2” population\(TwentyPop[population\)TwentyPop == “50”] <- “replace3”