Introduction to Statistics: R Visualization

Rasim Muzaffer Musal

Assumptions and Goals

  • Assuming you know how to read text files into R
  • Assuming you know how to row-column structure and use dataframe
  • At the end of the lecture you should be able to create a barplots, histograms, scatterplots and boxplots.

Read in the data

#Read the dataset which has header labels , 
#columns separated by tab delimited '\t'
data=read.table('C:/Users/rm84/Desktop/teaching/datasets/CovidData.txt',header=TRUE,sep='\t')

head(data)
  County_ID Poverty  Income     Density CovMort1 CovMort2 CovMort3 CovMort4
1         1   0.089 1.07589 8.826548080       39       43       59       40
2         2   0.172 0.58112 0.005841156        0        0        0        0
3         3   0.098 0.62640 0.250206859       10        5        1        0
4         4   0.161 0.58394 0.513789267        9       11       11        8
5         5   0.121 0.68248 0.167644688        1        1        8        4
6         6   0.120 0.59048 0.075806972        2        1        0        0
  CovMort5 CovMort6 CovMort7 CovMort8 CovMort9 CovMort10 CovMort11 CovMort12
1       22       10       16       17       15        58       115       164
2        0        0        0        0        0         0         0         0
3        0        0        0        0        0         4         5         6
4        5        4        3        3        6        24        27        19
5        3        2        2        0        0         3         3         9
6        0        0        0        0        1         1         1         1
  CovMort13 CovMort14 CovMort15 CovMort16 CovMort17 CovMort18 CovMort19
1       132       105        52        27        24        19        12
2         0         0         0         0         0         0         0
3         6         4         2         2         1         1         0
4        16         9         2         8         3         1         0
5         2         4         4         0         0         1         1
6         3         1         0         1         0         1         0
  CovMort20 CovMort21 CovMort22 CovMort23 CovMort24 CovMort25 CovMort26
1        15        10        13         5         3         4        22
2         0         0         0         0         0         0         0
3         0         0         0         0         1         0         0
4         1         4         0         2         1         0         4
5         2         1         0         1         1         0         2
6         0         0         0         0         0         0         0
  CovMort27 CovMort28 CovMort29 CovMort30 CovMort31 CovMort32 CovMort33
1        34        30        35        35        23        15        16
2         0         0         0         0         0         0         0
3         1         5         5         3         2         0         2
4         9        14        21        18        24        17         6
5         2         4         5         5         6         2         3
6         0         0         0         3         0         0         1
  CovMort34 CovMort35 CovMort35.1
1         8         7          NA
2         0         0          NA
3         0         3          NA
4         8         7          NA
5         4         1          NA
6         1         0          NA

Histogram

par(mfrow = c(1, 2))
hist(data$Income)
hist(data$Income,main="Distribution of Income",xaxt = "n",xlab="Income")
axis(side = 1, at = seq(0.4, 1.4, by = 0.1))

Barplot: Not very useful

par(mfrow = c(1, 2))
barplot(data$CovMort18)
barplot(data$CovMort18,main="Covid Mortality",xaxt = "n",xlab="County Index")
axis(side = 1)

Barplot: Not very useful

table(data$CovMort18)

  0   1   2   3   4   5   6   7  10  11  12  14  15  19  22  27  32  56 166 
 18   9   5   2   4   2   2   2   1   1   1   2   1   3   1   1   1   1   1 
par(mfrow = c(1, 2))
barplot(table(data$CovMort18),main="Mortality Counts",xaxt = "n",xlab="Mortality Counts",ylab="Number of Counties")
barplot(table(data$CovMort18)/sum(data$CovMort18),main="Mortality Counts",xaxt = "n",xlab="Mortality Counts",ylab="Number of Counties")
axis(side = 1)