#problem A
midwest %>%
group_by(state) %>%
summerize( (poptotalmean)=mean(poptotal), #the average population for each state
poptotalmed= median(poptotal),#the median population of each state
popmax=max(poptotal), #maximum population of each state
popmin=min(poptotal), #minimum population in each state
popdistinct= n_distict(poptotal), #calculate the number of unique values for each states total population
popfirst= first(poptotal), #take the first total population value in each state
popany= any(poptotal< 5000), #take any number of the population less than 5000
popany2= any(poptotal > 2000000)) %>% #take the number of the population greater than 2000000
#problem B midwest %>% group_by(state) %>% summerize(num5k= sum(poptotal < 5000), #the number of the population in each state that are less than 5000
num2mil=sum(poptotal > 2000000), #the number of the population in each state graeteer than 2,000000
numrows = n()) %>% # the number of entries for each state
#problem C #part 1 midwest %>% group_by(county)%>% #grouping the data set by county
summerize(x=n_distinct(state))%>% #the number of unique states associated with each state
arrange(desc(x))%>% #arranges the data in descending order
#part 2 midwest %>%
group_by(county) %>% #grouping the available data by county
summerize(x= n())%>% #the number of rows in each county entry grouped and results of the grouped counties
#part 3 midwest %>%
group_by(county) %>% #groups county
summarize(x = n_distinct(county)) %>% #counts the number of unique counties within each group
#problem D diamonds %>%
group_by(clarity) %>% #group the diamonds in accordance to their unique cloudiness
summarize(a = n_distinct(color), #counts the number of diamond colors for each clarity level
b = n_distinct(price), #counts the number of prices of diamonds for each clarity level
c = n()) %>% #counts the number of entries for each diamond clarity level
#problem E diamonds %>%
group_by(color, cut) %>%
summarize(m = mean(price), #Calculate the mean price for each color and cut
s = sd(price)) %>% #Calculate the standard deviation of price for each color and cut
#part 2 diamonds %>%
group_by(cut, color) %>%
summarize(m = mean(price), #Calculate the average mean price for each color and cut
s = sd(price)) %>% #Calculate the standard deviation for each color and cut
#part 3 diamonds %>%
group_by(cut, color, clarity) %>%
summarize(m = mean(price), #Calculate the average mean price for each color and cut
s = sd(price), #Calculate the standard deviation of price for each diamond
msale = m * 0.80) %>% #Calculate the sale price, whi9ch in turn is 80% of mean price
#problem F #the food names represent columns
diamonds %>% group_by(cut) %>% summarize(potato = mean(depth), #calculate the mean depth for each cut
pizza = mean(price), #calculate the mean price for each cut
popcorn = median(y), #calculate the median of the 'y' variable
pineapple = potato - pizza, #calculate the difference between mean depth and price
papaya = pineapple ^ 2, #square up the results of pineapple
peach = n()) %>% #count the total number of diamonds
#problem G diamonds %>%
group_by(color) %>%
summarize(m = mean(price)) %>% #Calculates the average price of diamonds for each color
mutate(x1 = str_c(“Diamond color”, color), x2 = 5) %>%
#part 2 diamonds %>%
group_by(color) %>%
summarize(m = mean(price)) %>% #Calculates the average price of diamonds for each color
ungroup() %>% mutate(x1 = str_c(“Diamond color”, color), # holds a column that combines the color and color value of each diamond x2 = 5)
#problem H
diamonds %>%
group_by(color) %>%
mutate(x1 = price * 0.5) %>%
summarize(m = mean(x1)) %>% #Calculate the mean of each color
ungroup() #Removes the grouping
#part 2 # What’s the difference between part I and II? diamonds %>%
group_by(color) %>% #groups data by color
mutate(x1 = price * 0.5) %>% #creates a new column that is half then price
ungroup() %>% #removes the groupings
summarize(m = mean(x1)) #Calculate the mean for each color
library(tidyverse)