Use the data from the Top Grossing Movies for 2014 - dataset contains top 50 # http://www.the-numbers.com/market/2014/top-grossing-movies
library(graphics)
plot(movies$Tickets_Sold, movies$Gross_Sales)
plot(movies$Tickets_Sold/1000, movies$Gross_Sales/1000)
plot(movies$Tickets_Sold/100000, movies$Gross_Sales/100000)
plot(movies$Tickets_Sold/1000000, movies$Gross_Sales/1000000)
Refine the scatterplot
movies$Tickets_Sold_Millions <- movies$Tickets_Sold/1000000
movies$Gross_Sales_Millions <- movies$Gross_Sales/1000000
plot(movies$Tickets_Sold_Millions, movies$Gross_Sales_Millions, main = "Number of tickets sold and gross ticket sales", xlab = "Number of tickets sold (in millions)", ylab = "Gross ticket sales (in millions of $USD)", frame.plot = FALSE, col="#4cbea3")
cor(movies$Tickets_Sold_Millions, movies$Gross_Sales_Millions)
## [1] 1
# Note: There seems to be a perfect positive correlation between tickets sold and gross, which is to be expected.
plot(movies$Tickets_Sold_Millions,movies$Gross_Sales_Millions, type="b", main = "Number of tickets sold and gross ticket sales", xlab = "Number of tickets sold (in millions)", ylab = "Gross ticket sales (in millions of $USD)", frame.plot = FALSE, col="#4cbea3", las=2)
boxplot(movies$Gross_Sales_Millions, main="Top 50 Grossing Films of 2014 (in millions of $USD)",frame.plot=FALSE, col="#4cbea3")
boxplot(movies$Gross_Sales_Millions,
horizontal = TRUE,
main="Top 50 Grossing Films of 2014, Sales (in millions of USD $)",
frame.plot=FALSE, col="#4cbea3")
movies$Genre_factor <- as.factor(movies$Genre)
movies$Genre_numeric <- as.numeric(movies$Genre_factor)
hist(movies$Genre_numeric, main = "Frequencies of top 50 movies by genre", xlab = "Genre category", col="#4cbea3", border="#FFFFFF", labels=TRUE)
hist(movies$Gross_Sales_Millions, main = "Frequencies of top 50 movies by gross sales in millions of USD$", xlab = "Gross Sales", col="#4cbea3", border="#FFFFFF")
hist(movies$Gross_Sales_Millions, main = "Frequencies of top 50 movies by gross sales in millions of USD$", xlab = "Gross Sales", col="#4cbea3", border="#FFFFFF",breaks=10)
hist(movies$Tickets_Sold_Millions, xlab="Tickets sold (millions)",main="Number of tickets sold for the top 50 grossing films of 2014", col="#4cbea3", border="#FFFFFF")
# change the breaks
hist(movies$Tickets_Sold_Millions, breaks=15, xlab="Tickets sold (millions)", main="Number of tickets sold for the top 50 grossing films of 2014", col="#4cbea3", border="#FFFFFF")
# print freq counts at top
hist(movies$Tickets_Sold_Millions,
breaks=15,
labels=TRUE,
xlab="Tickets sold (millions)",
main="Number of tickets sold for the top 50 grossing films of 2014", col="#4cbea3", border="#FFFFFF"
)
Need to plot the table of movie genres as proporations of a whole
barplot(prop.table(table(movies$Genre)),col="#4cbea3", border="#FFFFFF")
Change the font size of the labels for the x-axis and add the label names
table(movies$Genre)
##
## Action Adventure Black Comedy Comedy
## 9 9 3 7
## Drama Horror Romantic Comedy Thriller/Suspense
## 16 2 2 2
barplot(prop.table(table(movies$Genre)), cex.names = .5, names.arg =c("Action", "Adventure", "Black Comedy", "Comedy", "Drama", "Horror", "Rom Com", "Thriler/Suspense"), col="#4cbea3", border="#FFFFFF")
library(car)
scatterplot(xlab="Tickets",ylab="Gross",main="Gross X Tickets",movies$Tickets_Sold_Millions, movies$Gross_Sales_Millions, col="#4cbea3")
require(ggvis)
## Loading required package: ggvis
movies %>% ggvis(x=~movies$Tickets_Sold_Millions, y=~movies$Gross_Sales_Millions) %>%layer_points(fill="Movie")
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:ggvis':
##
## resolution
ggplot(movies, aes(movies$Tickets_Sold_Millions, movies$Gross_Sales_Millions)) + geom_point(col="#4cbea3")
library(ggplot2)
library(ggthemes)
ggplot(movies, aes(movies$Tickets_Sold_Millions, movies$Gross_Sales_Millions)) + geom_point() + theme_economist() + labs(title = "Tickets sold and gross earnings", subtitle = "sub title here",caption = "Data from the-numbers.com", x = "Tickets sold in millions",y = "Gross in millions")
library(ggplot2)
library(ggthemes)
ggplot(movies, aes(movies$Tickets_Sold_Millions, movies$Gross_Sales_Millions)) + geom_point()+ labs(title = "Tickets sold and gross earnings", subtitle = "sub title here",caption = "Data from the-numbers.com", x = "Tickets sold in millions",y = "Gross in millions") + theme_fivethirtyeight()
ggsave("myspecialplot.pdf")
## Saving 7 x 5 in image
taxisub <- subset(taxi, taxi$passenger_count==3 & taxi$trip_distance >=20)
require(ggplot2)
require(ggthemes)
ggplot(taxisub, aes(taxisub$trip_distance,taxisub$fare_amount )) + geom_point() + theme_excel()