Nathan Stewart 811847789

#The following lines of code automatically check what folder your *.Rmd-file is in and sets that folder as your working directory

set_wd <- function() {
  library(rstudioapi)
  current_path <- getActiveDocumentContext()$path 
  setwd(dirname(current_path ))
  print( getwd() )
}

#If you want to manually set your working directory, you can use the setwd command with your specific path as seen below
#setwd("Path")

library(rmarkdown)
library(latexpdf)
library(tinytex)
library(ggplot2)
library('plyr')
library(lattice)

Homework Starts here

#3.1 A)

##Homework 1 
#A
slices <- c(612, 852, 821, 253, 562, 532)
lbls = c('National Defense', 'Social Security', 'Medicare & Medicaid', 'National Debt Interest', 'Major Social-Aid Programs', 'Other')
pie(slices, labels = lbls, main="Pie Chart of Expendatures in 2014")

##B)
df1 <- data.frame("Federal Program"=c('National Defense', 'Social Security', 'Medicare & Medicaid', 'National Debt Interest', 'Major Social-Aid Programs', 'Other'),
                 expenditures=c(612, 852, 821, 253, 562, 532))
df1

##             Federal.Program expenditures
## 1          National Defense          612
## 2           Social Security          852
## 3       Medicare & Medicaid          821
## 4    National Debt Interest          253
## 5 Major Social-Aid Programs          562
## 6                     Other          532

ggplot(df1, aes(x=Federal.Program , y=expenditures, fill=Federal.Program)) +
  geom_bar(stat="identity") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

###C
df1$percentage= 100*(df1$expenditures/sum(df1$expenditures))
df1

##             Federal.Program expenditures percentage
## 1          National Defense          612  16.850220
## 2           Social Security          852  23.458150
## 3       Medicare & Medicaid          821  22.604626
## 4    National Debt Interest          253   6.965859
## 5 Major Social-Aid Programs          562  15.473568
## 6                     Other          532  14.647577

ggplot(df1, aes(x=Federal.Program, y=percentage, fill=Federal.Program)) +
  geom_bar(stat="identity") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

ggplot(df1, aes(x = "", y =percentage, fill =Federal.Program)) +
  geom_col() +
  geom_text(aes(label =percentage),
            position = position_stack(vjust = 0.5)) +
  coord_polar(theta = "y")+
  theme_void()

D) I would say that the percentage pie chart is the best, if labeled like the first pie chart I made. I think it would be the best because it would have the percentages on their slice, and the formatted labels are directly pointing at the slice they represent, instead of having a key to look back and forth between.

#3.4

range=0.3
k=7
n=25
CI=.05
LengthInterval=(n/range)
starting=.705

df2<-data.frame("flouride"= c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79))

sort(df2$flouride)

##  [1] 0.72 0.75 0.76 0.77 0.78 0.79 0.81 0.82 0.83 0.83 0.84 0.84 0.85 0.85 0.86
## [16] 0.88 0.89 0.89 0.92 0.93 0.94 0.94 0.97 0.97 1.05

df2$flouride[which(df2$flouride>.705 & df2$flouride< .755)]

## [1] 0.75 0.72

(max(df2$flouride)- min(df2$flouride))/7

## [1] 0.04714286

hist(df2$flouride,
    main = "Relative Frequency Histogram",
     xlab = "Flouride Level",
     ylab = "Relative Frequency %")

hist(df2$flouride, 
     nint=7,
     equal.widths=.05,
     main = "Relative Frequency Histogram",
     xlab = "Flouride Level",
     ylab = "Relative Frequency %")

## Warning in plot.window(xlim, ylim, "", ...): "nint" is not a graphical parameter

## Warning in plot.window(xlim, ylim, "", ...): "equal.widths" is not a graphical
## parameter

## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...): "nint"
## is not a graphical parameter

## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "equal.widths" is not a graphical parameter

## Warning in axis(1, ...): "nint" is not a graphical parameter

## Warning in axis(1, ...): "equal.widths" is not a graphical parameter

## Warning in axis(2, at = yt, ...): "nint" is not a graphical parameter

## Warning in axis(2, at = yt, ...): "equal.widths" is not a graphical parameter

max(df2$flouride)- min(df2$flouride)

## [1] 0.33

df3.4<-data.frame("class"=c(1, 2, 3, 4, 5, 6, 7), 
                  "frequency"=c(2, 4, 8, 4, 4, 2, 1))
df3.4

##   class frequency
## 1     1         2
## 2     2         4
## 3     3         8
## 4     4         4
## 5     5         4
## 6     6         2
## 7     7         1

df3.4$relative_frequency=(df3.4$frequency/25)
df3.4

##   class frequency relative_frequency
## 1     1         2               0.08
## 2     2         4               0.16
## 3     3         8               0.32
## 4     4         4               0.16
## 5     5         4               0.16
## 6     6         2               0.08
## 7     7         1               0.04

[1] 0.72 0.75 0.76 0.77 0.78 0.79 0.81 0.82 0.83 0.83 0.84 0.84 0.85 0.85 0.86 0.88 0.89 0.89 0.92 0.93 0.94 0.94 0.97 0.97 1.05

range is 0.3
see graph
see graph
about 24%, so about 88 days would have a flouride reading above .90ppm (365*.24).

#3.7

df3.7 <- data.frame("standard therapy"=c( 4, 15, 24, 10, 1, 27, 31, 14, 2, 16, 32, 7, 13, 36, 29, 6, 12, 18, 14, 15, 18, 6, 13, 21, 20, 8, 3, 24), 
                    "new therapy"=c( 5, 20, 29, 15, 7, 32, 36, 17, 15, 19, 35, 10, 16, 39, 27, 14, 10, 16, 12, 13, 16, 9, 18, 33, 30, 29, 31, 27))
df3.7

##    standard.therapy new.therapy
## 1                 4           5
## 2                15          20
## 3                24          29
## 4                10          15
## 5                 1           7
## 6                27          32
## 7                31          36
## 8                14          17
## 9                 2          15
## 10               16          19
## 11               32          35
## 12                7          10
## 13               13          16
## 14               36          39
## 15               29          27
## 16                6          14
## 17               12          10
## 18               18          16
## 19               14          12
## 20               15          13
## 21               18          16
## 22                6           9
## 23               13          18
## 24               21          33
## 25               20          30
## 26                8          29
## 27                3          31
## 28               24          27

ggplot(df3.7, aes(x=standard.therapy)) + 
    geom_histogram(binwidth=0.5) + 
    theme_classic() +
    xlab("Survival time (standard)") +
    ylab("Frequency")

ggplot(df3.7, aes(x=new.therapy)) + 
    geom_histogram(binwidth=0.5) + 
    theme_classic() +
    xlab("Survival time (new)") +
    ylab("Frequency")

A) see graphs B) The new treatment seems to only have 1 main peak, at a higher point, while the standard seems to have around 4 peaks at the same max, all of which are shorter than the new treatment. However, all of the new treatment’s peaks were at longer survival times, which I believe would indicate a longer survival time overall. #3.14

x3=c(155, 25, 30, 52, 142, 35, 51, 26, 2, 23, 270, 74, 29, 29, 29, 29, 51, 83, 9, 69)
mean(x3)

## [1] 60.65

median(x3)

## [1] 32.5

mod=function(x3){
 dat_tab = table(x3) 
 temp = which.max(dat_tab)
 idx = which(dat_tab==dat_tab[temp]) 
 return(dat_tab[idx])
}
mod(x3)

## 29 
##  4

Mean= 60.65 median= 32.5 Mode= 29, appearing 4 times

#3.29

df3.29<-data.frame(treatment.times= c(21, 20, 31, 24, 15, 21, 24, 18, 33, 8, 26, 17, 27, 29, 24, 14, 29, 41, 15, 11, 13, 28, 22, 16, 12, 15, 11, 16, 18, 17, 29, 16, 24, 21, 19, 7, 16, 12, 45, 24, 21, 12, 10, 13, 20, 35, 32, 22, 12, 10))
df3.29

##    treatment.times
## 1               21
## 2               20
## 3               31
## 4               24
## 5               15
## 6               21
## 7               24
## 8               18
## 9               33
## 10               8
## 11              26
## 12              17
## 13              27
## 14              29
## 15              24
## 16              14
## 17              29
## 18              41
## 19              15
## 20              11
## 21              13
## 22              28
## 23              22
## 24              16
## 25              12
## 26              15
## 27              11
## 28              16
## 29              18
## 30              17
## 31              29
## 32              16
## 33              24
## 34              21
## 35              19
## 36               7
## 37              16
## 38              12
## 39              45
## 40              24
## 41              21
## 42              12
## 43              10
## 44              13
## 45              20
## 46              35
## 47              32
## 48              22
## 49              12
## 50              10

mean(df3.29$treatment.times)

## [1] 20.32

median(df3.29$treatment.times)

## [1] 19.5

sort(df3.29$treatment.times)

##  [1]  7  8 10 10 11 11 12 12 12 12 13 13 14 15 15 15 16 16 16 16 17 17 18 18 19
## [26] 20 20 21 21 21 21 22 22 24 24 24 24 24 26 27 28 29 29 29 31 32 33 35 41 45

var(df3.29$treatment.times)

## [1] 70.09959

sd(df3.29$treatment.times)

## [1] 8.37255

quantile(df3.29$treatment.times, .25)

##   25% 
## 14.25

## p=c(.25, .9)
## quantile(`treatment times`, p)

The 25th percentile is 14.25, this means that 25% of people have a treatment time of 14.25 mins or less
The Data supports the claim that 90% of the patients have a treatment time of 40 mins or less. You can see this easily, as only 2 out of 50 data points were above 40 minutes.this means that 48/50 patients had a time under 40 mins, which is 96%.

#3.30

df3.3 <- data.frame("number of trees"= c(7, 8, 6, 4, 9, 11, 9, 9, 9, 10, 9, 8, 11, 5, 8, 5, 8, 8, 7, 8, 3, 5, 8, 7, 10, 7, 8, 9, 8, 11, 10, 8, 9, 8, 9, 9, 7, 8, 13, 8, 9, 6, 7, 9, 9, 7, 9, 5, 6, 5, 6, 9, 8, 8, 4, 4, 7, 7, 8, 9, 10, 2, 7, 10, 8, 10, 6, 7, 7, 8))
df3.3

##    number.of.trees
## 1                7
## 2                8
## 3                6
## 4                4
## 5                9
## 6               11
## 7                9
## 8                9
## 9                9
## 10              10
## 11               9
## 12               8
## 13              11
## 14               5
## 15               8
## 16               5
## 17               8
## 18               8
## 19               7
## 20               8
## 21               3
## 22               5
## 23               8
## 24               7
## 25              10
## 26               7
## 27               8
## 28               9
## 29               8
## 30              11
## 31              10
## 32               8
## 33               9
## 34               8
## 35               9
## 36               9
## 37               7
## 38               8
## 39              13
## 40               8
## 41               9
## 42               6
## 43               7
## 44               9
## 45               9
## 46               7
## 47               9
## 48               5
## 49               6
## 50               5
## 51               6
## 52               9
## 53               8
## 54               8
## 55               4
## 56               4
## 57               7
## 58               7
## 59               8
## 60               9
## 61              10
## 62               2
## 63               7
## 64              10
## 65               8
## 66              10
## 67               6
## 68               7
## 69               7
## 70               8

ggplot(df3.3, aes(x=number.of.trees)) + 
    geom_histogram(binwidth=1) + 
    theme_classic() +
    xlab("number of trees") +
    ylab("Frquency")

#B
mean(df3.3$number.of.trees)

## [1] 7.728571

sd(df3.3$number.of.trees)

## [1] 1.984881

quantile(df3.3$number.of.trees, .68)

## 68% 
##   9

p=c(.68,.95,.997)
quantile(df3.3$number.of.trees, p)

##    68%    95%  99.7% 
##  9.000 10.550 12.586

see graph
mean is 7.728571
S= 1.984881 (y+-s)= 9.677381 actual = 9.00 (y+-2s)=11.626191 actual = 10.550 (y+-3s)=13.575001 actual = 12.586 The calculated intervals are close, but not exact representations of the data.

3.4 calculations

x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.705 & x< .755)]

## [1] 0.75 0.72

x[which(x>=.705 & x<=.755)]

## [1] 0.75 0.72

length(which(x>.705 & x<.755))

## [1] 2

x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.755 & x< .805)]

## [1] 0.78 0.77 0.76 0.79

x[which(x>=.755 & x<=.805)]

## [1] 0.78 0.77 0.76 0.79

length(which(x>.755 & x<.805))

## [1] 4

x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.805 & x< .855)]

## [1] 0.84 0.85 0.84 0.83 0.82 0.83 0.81 0.85

x[which(x>=.805 & x<=.855)]

## [1] 0.84 0.85 0.84 0.83 0.82 0.83 0.81 0.85

length(which(x>.805 & x<.855))

## [1] 8

x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.855 & x< .905)]

## [1] 0.86 0.89 0.89 0.88

x[which(x>=.855 & x<=.905)]

## [1] 0.86 0.89 0.89 0.88

length(which(x>.855 & x<.905))

## [1] 4

x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.905 & x< .955)]

## [1] 0.94 0.92 0.94 0.93

x[which(x>=.905 & x<=.955)]

## [1] 0.94 0.92 0.94 0.93

length(which(x>.905 & x<.955))

## [1] 4

x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.955 & x< 1.005)]

## [1] 0.97 0.97

x[which(x>=.955 & x<=1.005)]

## [1] 0.97 0.97

length(which(x>.955 & x<1.005))

## [1] 2

x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>1.005 & x< 1.055)]

## [1] 1.05

x[which(x>=1.005 & x<=1.055)]

## [1] 1.05

length(which(x>1.005 & x<1.055))

## [1] 1

Homework 1 Stat 703

Nathan Stewart 811847789

Homework Starts here

3.4 calculations