Load movie ratings csv into R data frame

movie_ratings <- read.csv("https://raw.githubusercontent.com/ajbentley/cuny_ms_ds/master/607/movie_ratings.csv", header = TRUE)

# changing Respondant Name colum to character instead of factor
movie_ratings$Respondant.Name <- as.character((movie_ratings$Respondant.Name))
head(movie_ratings)
##   id Respondant.Name Isle.of.Dogs The.First.Purge Christopher.Robin
## 1  1            DMH             5               1                 1
## 2  2             Mea            3               1                 3
## 3  3          Ryder             1               1                 4
## 4  4         Kenneth            1               3                 1
## 5  5           Alice            2               2                 2
## 6  6            Rose            4               1                 1
##   Skyscraper Blockers A.Quiet.Place Gender         Age          US.Region
## 1          1        1             1 Female       40-49    Middle Atlantic
## 2          1        1             1 Female       30-39 West North Central
## 3          1        1             1 Female       40-49            Pacific
## 4          2        3             1   Male       40-49 East North Central
## 5          2        2             2 Female       40-49    Middle Atlantic
## 6          1        1             3 Female 60 or older East North Central

Check df dimensions and summary

dim(movie_ratings)
## [1] 14 11
summary(movie_ratings)
##        id        Respondant.Name     Isle.of.Dogs   The.First.Purge
##  Min.   : 1.00   Length:14          Min.   :1.000   Min.   :1.000  
##  1st Qu.: 4.25   Class :character   1st Qu.:2.000   1st Qu.:1.000  
##  Median : 7.50   Mode  :character   Median :3.000   Median :2.500  
##  Mean   : 7.50                      Mean   :2.857   Mean   :2.214  
##  3rd Qu.:10.75                      3rd Qu.:3.750   3rd Qu.:3.000  
##  Max.   :14.00                      Max.   :5.000   Max.   :4.000  
##  Christopher.Robin   Skyscraper       Blockers     A.Quiet.Place  
##  Min.   :1.000     Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000     1st Qu.:1.000   1st Qu.:1.250   1st Qu.:1.250  
##  Median :3.000     Median :2.000   Median :2.500   Median :3.000  
##  Mean   :2.643     Mean   :2.286   Mean   :2.286   Mean   :2.714  
##  3rd Qu.:3.000     3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.750  
##  Max.   :4.000     Max.   :5.000   Max.   :4.000   Max.   :5.000  
##     Gender            Age                  US.Region
##  Female:11   30-39      : 2   East North Central:4  
##  Male  : 3   40-49      :10   Middle Atlantic   :5  
##              60 or older: 2   New England       :2  
##                               Pacific           :1  
##                               West North Central:2  
## 

Make table of ratings wih average row

nums <- unlist(lapply(movie_ratings, is.numeric))
mr_ratings_only <- movie_ratings[ , nums]

head(mr_ratings_only)
##   id Isle.of.Dogs The.First.Purge Christopher.Robin Skyscraper Blockers
## 1  1            5               1                 1          1        1
## 2  2            3               1                 3          1        1
## 3  3            1               1                 4          1        1
## 4  4            1               3                 1          2        3
## 5  5            2               2                 2          2        2
## 6  6            4               1                 1          1        1
##   A.Quiet.Place
## 1             1
## 2             1
## 3             1
## 4             1
## 5             2
## 6             3
avg_rtgs <- apply(mr_ratings_only,2,mean)
avg_rtgs <- round(avg_rtgs, digits=2)
avg_rtgs
##                id      Isle.of.Dogs   The.First.Purge Christopher.Robin 
##              7.50              2.86              2.21              2.64 
##        Skyscraper          Blockers     A.Quiet.Place 
##              2.29              2.29              2.71
mr_ratings_only <- rbind(mr_ratings_only, avg_rtgs)
mr_ratings_only <- round(mr_ratings_only,digits=2)
tail(mr_ratings_only)
##      id Isle.of.Dogs The.First.Purge Christopher.Robin Skyscraper Blockers
## 10 10.0         3.00            3.00              3.00       3.00     3.00
## 11 11.0         5.00            4.00              3.00       1.00     2.00
## 12 12.0         1.00            1.00              4.00       4.00     2.00
## 13 13.0         2.00            2.00              2.00       5.00     4.00
## 14 14.0         4.00            3.00              4.00       3.00     3.00
## 15  7.5         2.86            2.21              2.64       2.29     2.29
##    A.Quiet.Place
## 10          3.00
## 11          4.00
## 12          4.00
## 13          4.00
## 14          5.00
## 15          2.71
movie_ratings[nrow(movie_ratings)+1,] <- NA
movie_ratings <- cbind(movie_ratings, mr_ratings_only)
movie_ratings <-  movie_ratings[,-c(3:8)] 
tail(movie_ratings)
##    id Respondant.Name Gender         Age          US.Region id.1
## 10 10           Casey Female       40-49        New England 10.0
## 11 11             Gil   Male 60 or older West North Central 11.0
## 12 12            Inae Female       40-49        New England 12.0
## 13 13            Pete   Male       40-49 East North Central 13.0
## 14 14        Kristen  Female       30-39    Middle Atlantic 14.0
## 15 NA            <NA>   <NA>        <NA>               <NA>  7.5
##    Isle.of.Dogs The.First.Purge Christopher.Robin Skyscraper Blockers
## 10         3.00            3.00              3.00       3.00     3.00
## 11         5.00            4.00              3.00       1.00     2.00
## 12         1.00            1.00              4.00       4.00     2.00
## 13         2.00            2.00              2.00       5.00     4.00
## 14         4.00            3.00              4.00       3.00     3.00
## 15         2.86            2.21              2.64       2.29     2.29
##    A.Quiet.Place
## 10          3.00
## 11          4.00
## 12          4.00
## 13          4.00
## 14          5.00
## 15          2.71
movie_ratings[15, 2] = "Average Rating"
tail(movie_ratings)
##    id Respondant.Name Gender         Age          US.Region id.1
## 10 10           Casey Female       40-49        New England 10.0
## 11 11             Gil   Male 60 or older West North Central 11.0
## 12 12            Inae Female       40-49        New England 12.0
## 13 13            Pete   Male       40-49 East North Central 13.0
## 14 14        Kristen  Female       30-39    Middle Atlantic 14.0
## 15 NA  Average Rating   <NA>        <NA>               <NA>  7.5
##    Isle.of.Dogs The.First.Purge Christopher.Robin Skyscraper Blockers
## 10         3.00            3.00              3.00       3.00     3.00
## 11         5.00            4.00              3.00       1.00     2.00
## 12         1.00            1.00              4.00       4.00     2.00
## 13         2.00            2.00              2.00       5.00     4.00
## 14         4.00            3.00              4.00       3.00     3.00
## 15         2.86            2.21              2.64       2.29     2.29
##    A.Quiet.Place
## 10          3.00
## 11          4.00
## 12          4.00
## 13          4.00
## 14          5.00
## 15          2.71

Create new DF that compares average movie ratings for men versus women

# df with only males
males <- movie_ratings[ which(movie_ratings$Gender=='Male'),]

# df with only males and only numeric columns
nums <- unlist(lapply(males, is.numeric))
male_rtgs_only <- males[ , nums]

# get average male ratings averaged to 2 digits and give it a nicer name
avg_male_rtgs <- apply(male_rtgs_only,2,mean)
avg_male_rtgs <- round(avg_male_rtgs, digits=2)

Male_Ratings <- avg_male_rtgs


# df with only females
females <- movie_ratings[ which(movie_ratings$Gender=='Female'),]


# df with only females and only numeric columns
nums <- unlist(lapply(females, is.numeric))
female_rtgs_only <- females[ , nums]

# get average female ratings averaged to 2 digits and give it a nicer name
avg_female_rtgs <- apply(female_rtgs_only,2,mean)
avg_female_rtgs <- round(avg_female_rtgs, digits=2)
avg_female_rtgs
##                id              id.1      Isle.of.Dogs   The.First.Purge 
##              7.00              7.00              2.91              2.00 
## Christopher.Robin        Skyscraper          Blockers     A.Quiet.Place 
##              2.82              2.18              2.09              2.64
Female_Ratings <- avg_female_rtgs

# combine and make comparative df
gend_diff <- data.frame(rbind(Male_Ratings, Female_Ratings, Male_Ratings - Female_Ratings))
gend_diff <-  gend_diff[,-c(1:2)] 
gend_diff
##                Isle.of.Dogs The.First.Purge Christopher.Robin Skyscraper
## Male_Ratings           2.67               3              2.00       2.67
## Female_Ratings         2.91               2              2.82       2.18
##                       -0.24               1             -0.82       0.49
##                Blockers A.Quiet.Place
## Male_Ratings       3.00          3.00
## Female_Ratings     2.09          2.64
##                    0.91          0.36

Creage graph showing male and female average ratings per movie side by side

# transpose dataframe

rtg_by_gender <- as.data.frame(t(gend_diff))

# drop change column
rtg_by_gender <-  rtg_by_gender[,-c(3)] 

# add index column
movie <- rownames(rtg_by_gender)
rtg_by_gender <- cbind(movie, rtg_by_gender)
rtg_by_gender
##                               movie Male_Ratings Female_Ratings
## Isle.of.Dogs           Isle.of.Dogs         2.67           2.91
## The.First.Purge     The.First.Purge         3.00           2.00
## Christopher.Robin Christopher.Robin         2.00           2.82
## Skyscraper               Skyscraper         2.67           2.18
## Blockers                   Blockers         3.00           2.09
## A.Quiet.Place         A.Quiet.Place         3.00           2.64
# convert to format needed to graph 2 variables
rbg_melt <- melt(rtg_by_gender, id.vars='movie')
rbg_melt
##                movie       variable value
## 1       Isle.of.Dogs   Male_Ratings  2.67
## 2    The.First.Purge   Male_Ratings  3.00
## 3  Christopher.Robin   Male_Ratings  2.00
## 4         Skyscraper   Male_Ratings  2.67
## 5           Blockers   Male_Ratings  3.00
## 6      A.Quiet.Place   Male_Ratings  3.00
## 7       Isle.of.Dogs Female_Ratings  2.91
## 8    The.First.Purge Female_Ratings  2.00
## 9  Christopher.Robin Female_Ratings  2.82
## 10        Skyscraper Female_Ratings  2.18
## 11          Blockers Female_Ratings  2.09
## 12     A.Quiet.Place Female_Ratings  2.64
# create and display graph
f <- ggplot(rbg_melt, aes(x=movie, y=value, fill=variable)) +
    geom_bar(stat='identity', position='dodge') + labs(title ="Movie Ratings By Gender", x = "Movies", y = "Average Rating")

f