#The following lines of code automatically check what folder your *.Rmd-file is in and sets that folder as your working directory
set_wd <- function() {
library(rstudioapi)
current_path <- getActiveDocumentContext()$path
setwd(dirname(current_path ))
print( getwd() )
}
#If you want to manually set your working directory, you can use the setwd command with your specific path as seen below
#setwd("Path")
library(rmarkdown)
library(latexpdf)
library(tinytex)
library(ggplot2)
library('plyr')
library(lattice)
#3.1 A)
##Homework 1
#A
slices <- c(612, 852, 821, 253, 562, 532)
lbls = c('National Defense', 'Social Security', 'Medicare & Medicaid', 'National Debt Interest', 'Major Social-Aid Programs', 'Other')
pie(slices, labels = lbls, main="Pie Chart of Expendatures in 2014")
B)
##B)
df1 <- data.frame("Federal Program"=c('National Defense', 'Social Security', 'Medicare & Medicaid', 'National Debt Interest', 'Major Social-Aid Programs', 'Other'),
expenditures=c(612, 852, 821, 253, 562, 532))
df1
## Federal.Program expenditures
## 1 National Defense 612
## 2 Social Security 852
## 3 Medicare & Medicaid 821
## 4 National Debt Interest 253
## 5 Major Social-Aid Programs 562
## 6 Other 532
ggplot(df1, aes(x=Federal.Program , y=expenditures, fill=Federal.Program)) +
geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
C)
###C
df1$percentage= 100*(df1$expenditures/sum(df1$expenditures))
df1
## Federal.Program expenditures percentage
## 1 National Defense 612 16.850220
## 2 Social Security 852 23.458150
## 3 Medicare & Medicaid 821 22.604626
## 4 National Debt Interest 253 6.965859
## 5 Major Social-Aid Programs 562 15.473568
## 6 Other 532 14.647577
ggplot(df1, aes(x=Federal.Program, y=percentage, fill=Federal.Program)) +
geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
ggplot(df1, aes(x = "", y =percentage, fill =Federal.Program)) +
geom_col() +
geom_text(aes(label =percentage),
position = position_stack(vjust = 0.5)) +
coord_polar(theta = "y")+
theme_void()
D) I
would say that the percentage pie chart is the best, if labeled like the
first pie chart I made. I think it would be the best because it would
have the percentages on their slice, and the formatted labels are
directly pointing at the slice they represent, instead of having a key
to look back and forth between.
#3.4
range=0.3
k=7
n=25
CI=.05
LengthInterval=(n/range)
starting=.705
df2<-data.frame("flouride"= c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79))
sort(df2$flouride)
## [1] 0.72 0.75 0.76 0.77 0.78 0.79 0.81 0.82 0.83 0.83 0.84 0.84 0.85 0.85 0.86
## [16] 0.88 0.89 0.89 0.92 0.93 0.94 0.94 0.97 0.97 1.05
df2$flouride[which(df2$flouride>.705 & df2$flouride< .755)]
## [1] 0.75 0.72
(max(df2$flouride)- min(df2$flouride))/7
## [1] 0.04714286
hist(df2$flouride,
main = "Relative Frequency Histogram",
xlab = "Flouride Level",
ylab = "Relative Frequency %")
hist(df2$flouride,
nint=7,
equal.widths=.05,
main = "Relative Frequency Histogram",
xlab = "Flouride Level",
ylab = "Relative Frequency %")
## Warning in plot.window(xlim, ylim, "", ...): "nint" is not a graphical parameter
## Warning in plot.window(xlim, ylim, "", ...): "equal.widths" is not a graphical
## parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...): "nint"
## is not a graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "equal.widths" is not a graphical parameter
## Warning in axis(1, ...): "nint" is not a graphical parameter
## Warning in axis(1, ...): "equal.widths" is not a graphical parameter
## Warning in axis(2, at = yt, ...): "nint" is not a graphical parameter
## Warning in axis(2, at = yt, ...): "equal.widths" is not a graphical parameter
max(df2$flouride)- min(df2$flouride)
## [1] 0.33
df3.4<-data.frame("class"=c(1, 2, 3, 4, 5, 6, 7),
"frequency"=c(2, 4, 8, 4, 4, 2, 1))
df3.4
## class frequency
## 1 1 2
## 2 2 4
## 3 3 8
## 4 4 4
## 5 5 4
## 6 6 2
## 7 7 1
df3.4$relative_frequency=(df3.4$frequency/25)
df3.4
## class frequency relative_frequency
## 1 1 2 0.08
## 2 2 4 0.16
## 3 3 8 0.32
## 4 4 4 0.16
## 5 5 4 0.16
## 6 6 2 0.08
## 7 7 1 0.04
[1] 0.72 0.75 0.76 0.77 0.78 0.79 0.81 0.82 0.83 0.83 0.84 0.84 0.85 0.85 0.86 0.88 0.89 0.89 0.92 0.93 0.94 0.94 0.97 0.97 1.05
#3.7
df3.7 <- data.frame("standard therapy"=c( 4, 15, 24, 10, 1, 27, 31, 14, 2, 16, 32, 7, 13, 36, 29, 6, 12, 18, 14, 15, 18, 6, 13, 21, 20, 8, 3, 24),
"new therapy"=c( 5, 20, 29, 15, 7, 32, 36, 17, 15, 19, 35, 10, 16, 39, 27, 14, 10, 16, 12, 13, 16, 9, 18, 33, 30, 29, 31, 27))
df3.7
## standard.therapy new.therapy
## 1 4 5
## 2 15 20
## 3 24 29
## 4 10 15
## 5 1 7
## 6 27 32
## 7 31 36
## 8 14 17
## 9 2 15
## 10 16 19
## 11 32 35
## 12 7 10
## 13 13 16
## 14 36 39
## 15 29 27
## 16 6 14
## 17 12 10
## 18 18 16
## 19 14 12
## 20 15 13
## 21 18 16
## 22 6 9
## 23 13 18
## 24 21 33
## 25 20 30
## 26 8 29
## 27 3 31
## 28 24 27
ggplot(df3.7, aes(x=standard.therapy)) +
geom_histogram(binwidth=0.5) +
theme_classic() +
xlab("Survival time (standard)") +
ylab("Frequency")
ggplot(df3.7, aes(x=new.therapy)) +
geom_histogram(binwidth=0.5) +
theme_classic() +
xlab("Survival time (new)") +
ylab("Frequency")
A)
see graphs B) The new treatment seems to only have 1 main peak, at a
higher point, while the standard seems to have around 4 peaks at the
same max, all of which are shorter than the new treatment. However, all
of the new treatment’s peaks were at longer survival times, which I
believe would indicate a longer survival time overall. #3.14
x3=c(155, 25, 30, 52, 142, 35, 51, 26, 2, 23, 270, 74, 29, 29, 29, 29, 51, 83, 9, 69)
mean(x3)
## [1] 60.65
median(x3)
## [1] 32.5
mod=function(x3){
dat_tab = table(x3)
temp = which.max(dat_tab)
idx = which(dat_tab==dat_tab[temp])
return(dat_tab[idx])
}
mod(x3)
## 29
## 4
Mean= 60.65 median= 32.5 Mode= 29, appearing 4 times
#3.29
df3.29<-data.frame(treatment.times= c(21, 20, 31, 24, 15, 21, 24, 18, 33, 8, 26, 17, 27, 29, 24, 14, 29, 41, 15, 11, 13, 28, 22, 16, 12, 15, 11, 16, 18, 17, 29, 16, 24, 21, 19, 7, 16, 12, 45, 24, 21, 12, 10, 13, 20, 35, 32, 22, 12, 10))
df3.29
## treatment.times
## 1 21
## 2 20
## 3 31
## 4 24
## 5 15
## 6 21
## 7 24
## 8 18
## 9 33
## 10 8
## 11 26
## 12 17
## 13 27
## 14 29
## 15 24
## 16 14
## 17 29
## 18 41
## 19 15
## 20 11
## 21 13
## 22 28
## 23 22
## 24 16
## 25 12
## 26 15
## 27 11
## 28 16
## 29 18
## 30 17
## 31 29
## 32 16
## 33 24
## 34 21
## 35 19
## 36 7
## 37 16
## 38 12
## 39 45
## 40 24
## 41 21
## 42 12
## 43 10
## 44 13
## 45 20
## 46 35
## 47 32
## 48 22
## 49 12
## 50 10
mean(df3.29$treatment.times)
## [1] 20.32
median(df3.29$treatment.times)
## [1] 19.5
sort(df3.29$treatment.times)
## [1] 7 8 10 10 11 11 12 12 12 12 13 13 14 15 15 15 16 16 16 16 17 17 18 18 19
## [26] 20 20 21 21 21 21 22 22 24 24 24 24 24 26 27 28 29 29 29 31 32 33 35 41 45
var(df3.29$treatment.times)
## [1] 70.09959
sd(df3.29$treatment.times)
## [1] 8.37255
quantile(df3.29$treatment.times, .25)
## 25%
## 14.25
## p=c(.25, .9)
## quantile(`treatment times`, p)
#3.30
df3.3 <- data.frame("number of trees"= c(7, 8, 6, 4, 9, 11, 9, 9, 9, 10, 9, 8, 11, 5, 8, 5, 8, 8, 7, 8, 3, 5, 8, 7, 10, 7, 8, 9, 8, 11, 10, 8, 9, 8, 9, 9, 7, 8, 13, 8, 9, 6, 7, 9, 9, 7, 9, 5, 6, 5, 6, 9, 8, 8, 4, 4, 7, 7, 8, 9, 10, 2, 7, 10, 8, 10, 6, 7, 7, 8))
df3.3
## number.of.trees
## 1 7
## 2 8
## 3 6
## 4 4
## 5 9
## 6 11
## 7 9
## 8 9
## 9 9
## 10 10
## 11 9
## 12 8
## 13 11
## 14 5
## 15 8
## 16 5
## 17 8
## 18 8
## 19 7
## 20 8
## 21 3
## 22 5
## 23 8
## 24 7
## 25 10
## 26 7
## 27 8
## 28 9
## 29 8
## 30 11
## 31 10
## 32 8
## 33 9
## 34 8
## 35 9
## 36 9
## 37 7
## 38 8
## 39 13
## 40 8
## 41 9
## 42 6
## 43 7
## 44 9
## 45 9
## 46 7
## 47 9
## 48 5
## 49 6
## 50 5
## 51 6
## 52 9
## 53 8
## 54 8
## 55 4
## 56 4
## 57 7
## 58 7
## 59 8
## 60 9
## 61 10
## 62 2
## 63 7
## 64 10
## 65 8
## 66 10
## 67 6
## 68 7
## 69 7
## 70 8
ggplot(df3.3, aes(x=number.of.trees)) +
geom_histogram(binwidth=1) +
theme_classic() +
xlab("number of trees") +
ylab("Frquency")
#B
mean(df3.3$number.of.trees)
## [1] 7.728571
sd(df3.3$number.of.trees)
## [1] 1.984881
quantile(df3.3$number.of.trees, .68)
## 68%
## 9
p=c(.68,.95,.997)
quantile(df3.3$number.of.trees, p)
## 68% 95% 99.7%
## 9.000 10.550 12.586
x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.705 & x< .755)]
## [1] 0.75 0.72
x[which(x>=.705 & x<=.755)]
## [1] 0.75 0.72
length(which(x>.705 & x<.755))
## [1] 2
2
x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.755 & x< .805)]
## [1] 0.78 0.77 0.76 0.79
x[which(x>=.755 & x<=.805)]
## [1] 0.78 0.77 0.76 0.79
length(which(x>.755 & x<.805))
## [1] 4
4
x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.805 & x< .855)]
## [1] 0.84 0.85 0.84 0.83 0.82 0.83 0.81 0.85
x[which(x>=.805 & x<=.855)]
## [1] 0.84 0.85 0.84 0.83 0.82 0.83 0.81 0.85
length(which(x>.805 & x<.855))
## [1] 8
8
x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.855 & x< .905)]
## [1] 0.86 0.89 0.89 0.88
x[which(x>=.855 & x<=.905)]
## [1] 0.86 0.89 0.89 0.88
length(which(x>.855 & x<.905))
## [1] 4
4
x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.905 & x< .955)]
## [1] 0.94 0.92 0.94 0.93
x[which(x>=.905 & x<=.955)]
## [1] 0.94 0.92 0.94 0.93
length(which(x>.905 & x<.955))
## [1] 4
4
x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>.955 & x< 1.005)]
## [1] 0.97 0.97
x[which(x>=.955 & x<=1.005)]
## [1] 0.97 0.97
length(which(x>.955 & x<1.005))
## [1] 2
2
x=c( 0.75, 0.86, 0.84, .85, .97, .94, .89, .84, .83, .89, .88, .78, .77, .76, .82, .72, .92, 1.05, .94, .83, .81, .85, .97, .93, .79)
x[which(x>1.005 & x< 1.055)]
## [1] 1.05
x[which(x>=1.005 & x<=1.055)]
## [1] 1.05
length(which(x>1.005 & x<1.055))
## [1] 1
1