This will not be read by the program.

DataCamp 0

Second screen in Lab 0

present = read.table("http://s3.amazonaws.com/assets.datacamp.com/course/dasi/present.txt")


# This will print your data set in the console:
head(present)
##   year    boys   girls
## 1 1940 1211684 1148715
## 2 1941 1289734 1223693
## 3 1942 1444365 1364631
## 4 1943 1508959 1427901
## 5 1944 1435301 1359499
## 6 1945 1404587 1330869

The present data frame is already loaded. Print the number of rows and

variables with the 'dim' function:

dim(present)
## [1] 63  3
# Print the names of the variables of the data frame:
names(present)
## [1] "year"  "boys"  "girls"

Note that no print statement is necessary.

get the number of boys born each year

boys <- present$boys
boys <- length(boys)
boys
## [1] 63
girls <- length(present$girls)
girls
## [1] 63

# Plot girls born per year against boys plot(x,y) is the form

plot(present$year, present$girls)

plot of chunk unnamed-chunk-3


# Now add a line
plot(present$year, present$girls, type = "line")
## Warning: plot type 'line' will be truncated to first character

plot of chunk unnamed-chunk-3

help(read.table)

Now get the total births by adding

present$babies <- present$boys + present$girls
head(present$babies)
## [1] 2360399 2513427 2808996 2936860 2794800 2735456

# Using that newly created variable as the denominator, calculate the
# proportoin of boys born in a give year
present$proportionBoys <- present$boys/present$babies
head(present$proportionBoys)
## [1] 0.5133 0.5131 0.5142 0.5138 0.5136 0.5135
plot(present$year, present$proportionBoys)

plot of chunk unnamed-chunk-4


# In what years to boys outnumber girls?

present$boysGreater <- present$boys > present$girls
head(present$boysGreater)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE

# Check to make sure all the variables are in order with the head() command.

head(present$babies)
## [1] 2360399 2513427 2808996 2936860 2794800 2735456
head(present$year)
## [1] 1940 1941 1942 1943 1944 1945
# Plot the boy-girl ratio for all years

present$ratio <- present$boys/present$girls
plot(present$year, present$ratio)

plot of chunk unnamed-chunk-4

plot(present$year, present$ratio, type = "line")
## Warning: plot type 'line' will be truncated to first character

plot of chunk unnamed-chunk-4

Now Find out in what year was the difference between the number of boys and girls greatest.


# Create the variable of the absolute difference for each year
present$absoluteDif <- present$boys - present$girls

# Check that your code did what you expected it to do.
head(present$absoluteDif)
## [1] 62969 66041 79734 81058 75802 73718

# Use the command max() on the newly created variable to find the answer.
max(present$absoluteDif)
## [1] 105244