Formative Assessment

#problem A

midwest %>%

group_by(state) %>%

summerize( (poptotalmean)=mean(poptotal), #the average population for each state

poptotalmed= median(poptotal),#the median population of each state

popmax=max(poptotal), #maximum population of each state

popmin=min(poptotal), #minimum population in each state

popdistinct= n_distict(poptotal), #calculate the number of unique values for each states total population

popfirst= first(poptotal), #take the first total population value in each state

popany= any(poptotal< 5000), #take any number of the population less than 5000

popany2= any(poptotal > 2000000)) %>% #take the number of the population greater than 2000000

#problem B midwest %>% group_by(state) %>% summerize(num5k= sum(poptotal < 5000), #the number of the population in each state that are less than 5000

        num2mil=sum(poptotal > 2000000), #the number of the population in each state graeteer than 2,000000
        
        numrows = n()) %>%  # the number of entries for each state

#problem C #part 1 midwest %>% group_by(county)%>% #grouping the data set by county

summerize(x=n_distinct(state))%>% #the number of unique states associated with each state

arrange(desc(x))%>% #arranges the data in descending order

#part 2 midwest %>%

group_by(county) %>% #grouping the available data by county

summerize(x= n())%>% #the number of rows in each county entry grouped and results of the grouped counties

#part 3 midwest %>%

group_by(county) %>% #groups county

summarize(x = n_distinct(county)) %>% #counts the number of unique counties within each group

#problem D diamonds %>%

group_by(clarity) %>% #group the diamonds in accordance to their unique cloudiness

summarize(a = n_distinct(color), #counts the number of diamond colors for each clarity level

        b = n_distinct(price),  #counts the number of prices of diamonds for each clarity level
        
        c = n()) %>%           #counts the number of entries for each diamond clarity level

#problem E diamonds %>%

group_by(color, cut) %>%

summarize(m = mean(price), #Calculate the mean price for each color and cut

        s = sd(price)) %>%    #Calculate the standard deviation of price for each color and cut

#part 2 diamonds %>%

group_by(cut, color) %>%

summarize(m = mean(price), #Calculate the average mean price for each color and cut

        s = sd(price)) %>%  #Calculate the standard deviation for each color and cut

#part 3 diamonds %>%

group_by(cut, color, clarity) %>%

summarize(m = mean(price), #Calculate the average mean price for each color and cut

        s = sd(price),    #Calculate the standard deviation of price for each diamond
        
        msale = m * 0.80) %>%  #Calculate the sale price, whi9ch in turn is 80% of mean price

#problem F #the food names represent columns

diamonds %>% group_by(cut) %>% summarize(potato = mean(depth), #calculate the mean depth for each cut

        pizza = mean(price),   #calculate the mean price for each cut
        
        popcorn = median(y),    #calculate the median of the 'y' variable
        
        pineapple = potato - pizza,  #calculate the difference between mean depth and price
        
        papaya = pineapple ^ 2,  #square up the results of pineapple
        
        peach = n()) %>%         #count the total number of diamonds

#problem G diamonds %>%

group_by(color) %>%

summarize(m = mean(price)) %>% #Calculates the average price of diamonds for each color

mutate(x1 = str_c(“Diamond color”, color), x2 = 5) %>%

#part 2 diamonds %>%

group_by(color) %>%

summarize(m = mean(price)) %>% #Calculates the average price of diamonds for each color

ungroup() %>% mutate(x1 = str_c(“Diamond color”, color), # holds a column that combines the color and color value of each diamond x2 = 5)

#problem H

diamonds %>%

group_by(color) %>%

mutate(x1 = price * 0.5) %>%

summarize(m = mean(x1)) %>% #Calculate the mean of each color

ungroup() #Removes the grouping

#part 2 # What’s the difference between part I and II? diamonds %>%

group_by(color) %>% #groups data by color

mutate(x1 = price * 0.5) %>% #creates a new column that is half then price

ungroup() %>% #removes the groupings

summarize(m = mean(x1)) #Calculate the mean for each color

library(tidyverse)

Formative Assessment

Manan Mensah

2024-10-11