install.packages("sqldf",repos = "http://cran.us.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/4z/9kr7k7jd1q7gc792xjkrwf040000gn/T//Rtmp77qfg8/downloaded_packages
library(sqldf) #sql package for doing sql queris
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
install.packages("dplyr",repos = "http://cran.us.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/4z/9kr7k7jd1q7gc792xjkrwf040000gn/T//Rtmp77qfg8/downloaded_packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("/Users/janakiramsundaraneedi/Desktop/sqlmidterm")
IMDB<-read.csv("final2.csv")
names(IMDB)
## [1] "X" "length" "name" "title" "year"
## [6] "genre" "studioname" "zipcode"
dim(IMDB)
## [1] 149 8
movies<-select(IMDB, title, year, length, genre, studioname)
dim(movies)
## [1] 149 5
names(movies)
## [1] "title" "year" "length" "genre" "studioname"
head(movies)
## title year length genre
## 1 Spectre\xeb_ 2015 148 Adventure
## 2 Star Wars: Episode VII - The Force Awakens\xeb_ NA NA Adventure
## 3 Jupiter Ascending\xeb_ 2015 127 Adventure
## 4 Six Days Seven Nights\xeb_ 1998 98 Crime
## 5 Mighty Joe Young\xeb_ 1998 114 Crime
## 6 The Manchurian Candidate\xeb_ 2015 129 Crime
## studioname
## 1 Universum Film
## 2 Universum Film
## 3 True north
## 4 Sudha Productions
## 5 Sudha Productions
## 6 Sudha Productions
stars<-select(IMDB, name, zipcode)
dim(stars)
## [1] 149 2
names(stars)
## [1] "name" "zipcode"
head(stars)
## name zipcode
## 1 Christoph Waltz 13
## 2 Doug Walker 14
## 3 Channing Tatum 14
## 4 Harrison Ford 8
## 5 Charlize Theron 8
## 6 Tom Cruise 5
starsIn<-select(IMDB, title, year, name)
dim(starsIn)
## [1] 149 3
names(starsIn)
## [1] "title" "year" "name"
head(starsIn)
## title year name
## 1 Spectre\xeb_ 2015 Christoph Waltz
## 2 Star Wars: Episode VII - The Force Awakens\xeb_ NA Doug Walker
## 3 Jupiter Ascending\xeb_ 2015 Channing Tatum
## 4 Six Days Seven Nights\xeb_ 1998 Harrison Ford
## 5 Mighty Joe Young\xeb_ 1998 Charlize Theron
## 6 The Manchurian Candidate\xeb_ 2015 Tom Cruise
1)Find the title of movies Harrison Ford starred in. solution:
title_movie_ford<-sqldf("select title from starsIn where name='Harrison Ford'")
## Loading required package: tcltk
title_movie_ford
## title
## 1 Six Days Seven Nights\xeb_
## 2 Ender's Game\xeb_
2)Find the name and zipcode of stars who have starred in a movie made by Sudha production
su_prod<-sqldf("SELECT name, zipcode from (select name,title,year From movies join starsIn USING(title, year) where studioname='Sudha Productions') join stars USING(name)")
su_prod
## name zipcode
## 1 Harrison Ford 8
## 2 Harrison Ford 12
## 3 Charlize Theron 8
## 4 Tom Cruise 2
## 5 Tom Cruise 5
## 6 Tom Cruise 11
## 7 Tom Cruise 11
## 8 Tom Cruise 13
## 9 Tom Cruise 13
## 10 Tom Cruise 14
## 11 Tom Cruise 14
## 12 Tom Cruise 17
## 13 Tom Cruise 19
## 14 Tom Cruise 20
## 15 Tom Cruise 20
## 16 Robin Williams 7
## 17 Gary Oldman 9
## 18 Gary Oldman 13
## 19 Brad Pitt 5
## 20 Brad Pitt 5
## 21 Brad Pitt 19
3)Find the name of stars who have starred in both Disney movies and MGM movies.
s_pic<-sqldf("SELECT DISTINCT name
FROM movies
JOIN starsIn USING(title, year) where studioname='S Pictures'")
sudha_prod<-sqldf("SELECT DISTINCT name
FROM movies
JOIN starsIn USING(title, year) where studioname='Sudha Productions'")
name_stars_both_studionames<-intersect(s_pic, sudha_prod)
print(name_stars_both_studionames)
## name
## 1 Tom Cruise
6)list the number of movies each star has starred in. Display the result in ascending order of star’s names.
name_adress<-sqldf("select name, count(title) AS count
from starsIn
group by name
order by name asc")
head(name_adress)
## name count
## 1 A.J. Buckley 1
## 2 Abbie Cornish 1
## 3 Adam Sandler 1
## 4 Aidan Turner 1
## 5 Al Pacino 1
## 6 Alain Delon 1
tail(name_adress)
## name count
## 99 Tom Hanks 2
## 100 Tony Curran 1
## 101 Vincent Schiavelli 1
## 102 Vivica A. Fox 1
## 103 Will Smith 1
## 104 William Smith 1
8)Find the name of studio and the average length of movies it made in 2015 if the studio has made more than 1 movies in 2015.
studio_name<-sqldf("select studioname, avg(length) from movies where year=2015 group by studioName having count(*)>1")
head(studio_name)
## studioname avg(length)
## 1 Bombay Talkies 137.5
## 2 Eskay Movies 125.5
## 3 Pannonia Film 152.0
## 4 Sudha Productions 129.5
tail(studio_name)
## studioname avg(length)
## 1 Bombay Talkies 137.5
## 2 Eskay Movies 125.5
## 3 Pannonia Film 152.0
## 4 Sudha Productions 129.5