# OpenIntro Data
# Load arbuthnot data
source("http://www.openintro.org/stat/data/arbuthnot.R")
# Load present data
source("http://www.openintro.org/stat/data/present.R")
# Dimension od data set
dim(present) #dimension
## [1] 63 3
present$Genre #giving genre of movie
## NULL
names(present) #giving category for movie
## [1] "year" "boys" "girls"
range(present$year+present$girls) #giving extreme of both
## [1] 1150655 2084013
plot(present$year,present$girls/present$boys , type = "l", main = "Ratio of Votes to Rating over time", xlab = "Year", ylab = "Ratio of Votes to Rating"); #plot the graph

which.max(present$girls) #giving the max of that column
## [1] 22
head(present) #show first couples row
## year boys girls
## 1 1940 1211684 1148715
## 2 1941 1289734 1223693
## 3 1942 1444365 1364631
## 4 1943 1508959 1427901
## 5 1944 1435301 1359499
## 6 1945 1404587 1330869
tail(present) #show last couples row
## year boys girls
## 58 1997 1985596 1895298
## 59 1998 2016205 1925348
## 60 1999 2026854 1932563
## 61 2000 2076969 1981845
## 62 2001 2057922 1968011
## 63 2002 2057979 1963747
summary(present$year) #giving Min, 1stQu, Median etc..
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1940 1956 1971 1971 1986 2002
mean(present$girls) #giving mean
## [1] 1793915
median(present$girls) #giving median= middle
## [1] 1831679
table(present$girls)
##
## 1148715 1223693 1330869 1359499 1364631 1427901 1528639 1531063 1537844 1543352
## 1 1 1 1 1 1 1 1 1 1
## 1588484 1597452 1620716 1623885 1703131 1705238 1717571 1721216 1730594 1733060
## 1 1 1 1 1 1 1 1 1 1
## 1733177 1753634 1759642 1760412 1768966 1773380 1789651 1794861 1800064 1816008
## 1 1 1 1 1 1 1 1 1 1
## 1827830 1831679 1832578 1833304 1858241 1875724 1895298 1900322 1901014 1903234
## 1 1 1 1 1 1 1 1 1 1
## 1907086 1925348 1930178 1932563 1951379 1958294 1963747 1967328 1968011 1971468
## 1 1 1 1 1 1 1 1 1 1
## 1973576 1981845 1982917 1996388 2009389 2028717 2029502 2034896 2051266 2071158
## 1 1 1 1 1 1 1 1 1 1
## 2074824 2078142 2082052
## 1 1 1
present[1,1] #row and column
## [1] 1940
present[1:12,] #column1 12rows
## year boys girls
## 1 1940 1211684 1148715
## 2 1941 1289734 1223693
## 3 1942 1444365 1364631
## 4 1943 1508959 1427901
## 5 1944 1435301 1359499
## 6 1945 1404587 1330869
## 7 1946 1691220 1597452
## 8 1947 1899876 1800064
## 9 1948 1813852 1721216
## 10 1949 1826352 1733177
## 11 1950 1823555 1730594
## 12 1951 1923020 1827830
present$girls[10] #looking for year10th
## [1] 1733177
present$year == 2016 #giving true or false for assigned year
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE
present$year <2016 #same thing like above
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE
sd(present$year) #standard deviation
## [1] 18.3303