Descriptive Statistics using R

Problem #1

#1. Consider the following 20 samples drawn from a population:

24.7 23.5 14.7 13.4 24.2 25.4 12.1 15.0 19.6 22.9 21.2 18.4 25.0 14.5 14.2 21.8 21.2 15.7 22.6 21.2
data <- c(24.7, 23.5, 14.7, 13.4, 24.2, 25.4, 12.1, 15.0, 19.6, 22.9, 21.2, 18.4, 25.0, 14.5, 14.2, 21.8, 21.2, 15.7, 22.6, 21.2)

a)Calculate the sample mean

mean_value <- mean(data)
mean_value
## [1] 19.565

b)Calculate the median

median_value <- median(data)
median_value
## [1] 21.2
  1. Calculate the sample variance
var_value <- var(data)
var_value
## [1] 19.43397

d)Calculate the sample standard deviation

sd_value <- sd(data)
sd_value
## [1] 4.408398
  1. Calculate the quartiles
quartiles <- quantile(data, probs = c(0.25, 0.5, 0.75))
quartiles
##    25%    50%    75% 
## 14.925 21.200 23.050
  1. Draw a histogram with 4 bins (change both the fill and border color) 
hist(data, breaks = 4,main="Histogram", col = "lightblue", border = "darkblue")

  1. Draw a boxplot
boxplot(data,main="Boxplot of 20 samples")

Problem #2

1.Consider a random sample of n=50 resistors from suppliers A and B contained in the datafile

https://raw.githubusercontent.com/tmatis12/datafiles/main/resistors.csv

data2 <- read.csv("https://raw.githubusercontent.com/tmatis12/datafiles/main/resistors.csv")
data2
##    SupplierA SupplierB
## 1   29.36279  30.62987
## 2   29.51027  29.13114
## 3   29.92811  29.81491
## 4   29.78606  29.46583
## 5   29.15107  29.59829
## 6   30.15748  29.12295
## 7   30.46553  30.13564
## 8   29.85992  29.23834
## 9   29.51514  28.69545
## 10  31.16777  29.46590
## 11  31.44440  29.84616
## 12  30.13799  29.27476
## 13  30.22418  29.89007
## 14  30.40154  30.11324
## 15  28.72853  29.17337
## 16  29.69430  29.87823
## 17  29.57491  30.07623
## 18  29.55289  29.75243
## 19  28.70514  29.29905
## 20  30.59459  29.19695
## 21  29.93642  29.95769
## 22  30.19725  29.50674
## 23  30.61088  29.85518
## 24  29.31532  29.00272
## 25  30.24217  29.21925
## 26  30.33630  29.33216
## 27  30.12932  30.27084
## 28  29.51702  28.85486
## 29  30.55834  29.67763
## 30  30.22333  30.02513
## 31  29.69312  29.58264
## 32  28.95343  30.35831
## 33  29.42375  28.79259
## 34  29.78290  29.59960
## 35  30.29367  28.66135
## 36  29.89087  29.81375
## 37  29.89336  29.23564
## 38  29.65950  30.17648
## 39  30.89342  29.82198
## 40  29.89125  29.44741
## 41  30.42494  29.21079
## 42  30.99763  29.30069
## 43  29.87300  29.26611
## 44  29.17079  29.83650
## 45  30.61960  30.05269
## 46  29.86612  29.66683
## 47  29.50220  28.98090
## 48  30.04817  30.31552
## 49  29.16770  29.58166
## 50  28.91839  29.96312

a)Calculate a summary (min, max, quartiles, mean, median) of the descriptive statistics for Supplier A (using the summary() command)

summary(data2$SupplierA)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   28.71   29.52   29.89   29.92   30.28   31.44

b)Calculate a histogram of the sample from supplier A (create a unique title and axes label).  Does the data "look" Normally distributed?

hist(data2$SupplierA, main = "Histogram of Resistors (Supplier A)", xlab = "Resistance Value",col = "green",border = "yellow")

c)Calculate side by side box plots

boxplot(data2$SupplierA,data2$SupplierB, main = "Box Plot of Resistors by Supplier", names= c("Supplier A","Supplier B"),xlab = "Supplier", ylab = "Resistance Value")

Problem #3

1.Consider a sample of body temperature and heartbeat for n=65 males (1) and females (2) contained in the datafile

https://raw.githubusercontent.com/tmatis12/datafiles/main/normtemp.csv

url <- "https://raw.githubusercontent.com/tmatis12/datafiles/main/normtemp.csv"
data <- read.csv(url)
df3<-read.csv('https://raw.githubusercontent.com/tmatis12/datafiles/main/normtemp.csv')
males<-df3[df3$Sex==1,]
males
##    Temp Sex Beats
## 1  96.3   1    70
## 2  96.7   1    71
## 3  96.9   1    74
## 4  97.0   1    80
## 5  97.1   1    73
## 6  97.1   1    75
## 7  97.1   1    82
## 8  97.2   1    64
## 9  97.3   1    69
## 10 97.4   1    70
## 11 97.4   1    68
## 12 97.4   1    72
## 13 97.4   1    78
## 14 97.5   1    70
## 15 97.5   1    75
## 16 97.6   1    74
## 17 97.6   1    69
## 18 97.6   1    73
## 19 97.7   1    77
## 20 97.8   1    58
## 21 97.8   1    73
## 22 97.8   1    65
## 23 97.8   1    74
## 24 97.9   1    76
## 25 97.9   1    72
## 26 98.0   1    78
## 27 98.0   1    71
## 28 98.0   1    74
## 29 98.0   1    67
## 30 98.0   1    64
## 31 98.0   1    78
## 32 98.1   1    73
## 33 98.1   1    67
## 34 98.2   1    66
## 35 98.2   1    64
## 36 98.2   1    71
## 37 98.2   1    72
## 38 98.3   1    86
## 39 98.3   1    72
## 40 98.4   1    68
## 41 98.4   1    70
## 42 98.4   1    82
## 43 98.4   1    84
## 44 98.5   1    68
## 45 98.5   1    71
## 46 98.6   1    77
## 47 98.6   1    78
## 48 98.6   1    83
## 49 98.6   1    66
## 50 98.6   1    70
## 51 98.6   1    82
## 52 98.7   1    73
## 53 98.7   1    78
## 54 98.8   1    78
## 55 98.8   1    81
## 56 98.8   1    78
## 57 98.9   1    80
## 58 99.0   1    75
## 59 99.0   1    79
## 60 99.0   1    81
## 61 99.1   1    71
## 62 99.2   1    83
## 63 99.3   1    63
## 64 99.4   1    70
## 65 99.5   1    75
females<-df3[df3$Sex==2,]
females
##      Temp Sex Beats
## 66   96.4   2    69
## 67   96.7   2    62
## 68   96.8   2    75
## 69   97.2   2    66
## 70   97.2   2    68
## 71   97.4   2    57
## 72   97.6   2    61
## 73   97.7   2    84
## 74   97.7   2    61
## 75   97.8   2    77
## 76   97.8   2    62
## 77   97.8   2    71
## 78   97.9   2    68
## 79   97.9   2    69
## 80   97.9   2    79
## 81   98.0   2    76
## 82   98.0   2    87
## 83   98.0   2    78
## 84   98.0   2    73
## 85   98.0   2    89
## 86   98.1   2    81
## 87   98.2   2    73
## 88   98.2   2    64
## 89   98.2   2    65
## 90   98.2   2    73
## 91   98.2   2    69
## 92   98.2   2    57
## 93   98.3   2    79
## 94   98.3   2    78
## 95   98.3   2    80
## 96   98.4   2    79
## 97   98.4   2    81
## 98   98.4   2    73
## 99   98.4   2    74
## 100  98.4   2    84
## 101  98.5   2    83
## 102  98.6   2    82
## 103  98.6   2    85
## 104  98.6   2    86
## 105  98.6   2    77
## 106  98.7   2    72
## 107  98.7   2    79
## 108  98.7   2    59
## 109  98.7   2    64
## 110  98.7   2    65
## 111  98.7   2    82
## 112  98.8   2    64
## 113  98.8   2    70
## 114  98.8   2    83
## 115  98.8   2    89
## 116  98.8   2    69
## 117  98.8   2    73
## 118  98.8   2    84
## 119  98.9   2    76
## 120  99.0   2    79
## 121  99.0   2    81
## 122  99.1   2    80
## 123  99.1   2    74
## 124  99.2   2    77
## 125  99.2   2    66
## 126  99.3   2    68
## 127  99.4   2    77
## 128  99.9   2    79
## 129 100.0   2    78
## 130 100.8   2    77

a)Calculate a summary of the descriptive statistics of heartbeat and body temperature for both males and females.

summary(males$Temp,males$Beats)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    96.3    97.6    98.1    98.1    98.6    99.5
summary(females$Temp,females$Beats)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   96.40   98.00   98.40   98.39   98.80  100.80

b)Calculate a histogram of heartbeat for females (create a unique title and axes label, change the color to pink).

hist(females$Beat, main = "Histogram of Heartbeat (Females)", xlab = "Heartbeat", col = "pink")

c)Calculate a histogram of heartbeat for males (create a unique title and axes label, change the color to blue).

hist(males$Beat, main = "Histogram of Heartbeat (Males)", xlab = "Heartbeat", col = "blue")

d)Across both males and females, what is the sample correlation coefficient between heartbeat and body temperature? 

cor(df3$Beat, df3$Temp)
## [1] 0.2536564