rm(list = ls()) # remove all objects
## Preparation, setting wd & import csv
setwd(dir = "/Users/daktre/Documents/Official/IPH/PhD/Data/Analysis/Survey analysis/R working folder/")
tum <- read.csv("Batch Tumkur_3_11.csv", header = TRUE) #load tumkur csv file
rai <- read.csv("BatchRaichur.csv", header = TRUE) #load raichur csv file
total <- rbind(tum, rai) #merging the two data frames VERTICALLY. 2 frames must have the same variables even if they are not in the same order. Can be verified in the workspace beside
View(total) #view merged data frame and check for any problems/mismatched rows/columns
attach(total)
## Add designations categories from new csv file
desig <- read.csv("Total_designations.csv", header = TRUE)
total <- cbind(total, desig)
names(total) #verify
## [1] "id" "a1" "a2" "a3" "a4"
## [6] "a5" "a6" "b1" "b2" "b3"
## [11] "b4" "b5" "b6" "b7" "b8"
## [16] "b9" "b10" "b11" "b12" "b13"
## [21] "b14" "b15" "b16" "b17" "b18"
## [26] "b19" "b20" "c1" "c2" "c3"
## [31] "c4" "c5" "c6" "c7" "c8"
## [36] "c9" "c10" "c11" "c12" "c13"
## [41] "c14" "c15" "c16" "c17" "c18"
## [46] "d1" "d2" "d3" "d4" "d5"
## [51] "d6" "d7" "d8" "d9" "d10"
## [56] "e1" "e2" "e3" "e4" "e51"
## [61] "e52" "e53" "e54" "e55" "e56"
## [66] "e57" "e58" "e59" "e60" "e61"
## [71] "e62" "e63" "e64" "e65" "e66"
## [76] "e6" "e7" "e8" "e9" "e10"
## [81] "f1" "f2" "f3" "f4" "f5"
## [86] "f6" "f7" "f8" "f9" "f10a"
## [91] "f10b" "f10bb" "f10c" "f10cc" "f11a"
## [96] "f11b" "f11c" "f11d" "f11e" "f11f"
## [101] "f11ff" "f11g" "f12" "f13a" "f13b"
## [106] "f13bb" "f13c" "f13cc" "f14a" "f14b"
## [111] "f14c" "f14d" "f14e" "f14f" "f14ff"
## [116] "f14g" "g1" "g2" "g3" "g4"
## [121] "g5" "g6" "g7" "g8" "g9"
## [126] "g10" "g11" "g12" "g13" "g14"
## [131] "h1" "h2" "h3" "h4a" "h4b"
## [136] "h4c" "h4d" "h4e" "h4f" "h5"
## [141] "h6" "h7" "h8" "h9" "h10"
## [146] "desig_cat"
## Subsetting
tumkur <- total[c(1:65), ]
raichur <- total[c(66:92), ]
## Describe respondents
tumres <- length(which(total$a1 < 200)) #Number of respondents in Tumkur
raires <- length(which(total$a1 > 199)) #Number of respondents in Raichur
rr_tum <- tumres * 100/69 #69 health managers in Tumkur. Calculate response rate for survey in Tumkur
rr_rai <- raires * 100/36 #36 health managers in Raichur Calculate response rate for survey in Raichur
table(total$h1) #Table with male and female respondents
##
## 1 2
## 71 21
male_perc <- length(which(total$h1 == TRUE)) * 100/length(total$h1) #Percentage of male respondents in both districts
male_perc_tum <- length(which(tumkur$h1 == TRUE)) * 100/length(tumkur$h1) # Percentage of male respondents in Tumkur
male_perc_rai <- length(which(raichur$h1 == TRUE)) * 100/length(raichur$h1) # Percentage of male respondents in Raichur
## Format age as dd/mm/yyyy and add as h22
h2_yearonly <- as.numeric(format(as.Date(h2, format = "%d/%m/%y"), format = "%y")) #convert vector of dates in dd/mm/yyyy format into a numeric vector of birth years
h22 <- 2012 - (1900 + h2_yearonly) #Get age by subtracting birth year from 2012. But, add 1900 to the vector to get birth year cos the survey used yy instead of yyyy
View(h22)
## h22 needs to be cleaned cos some values >100 as illogical date was
## entered instead of empty values
h22clearn <- replace(h22, c(which(h22 > 100)), c(NA, NA))
View(h22clearn)
which(h22clearn > 100) #Test again
## integer(0)
which(h22clearn == NA) #Test again. This one fails. NA is ignored?
## integer(0)
# Add h22clearn to the total dataframe
total <- cbind(total, h22clearn)
names(total) #Test all variables
## [1] "id" "a1" "a2" "a3" "a4"
## [6] "a5" "a6" "b1" "b2" "b3"
## [11] "b4" "b5" "b6" "b7" "b8"
## [16] "b9" "b10" "b11" "b12" "b13"
## [21] "b14" "b15" "b16" "b17" "b18"
## [26] "b19" "b20" "c1" "c2" "c3"
## [31] "c4" "c5" "c6" "c7" "c8"
## [36] "c9" "c10" "c11" "c12" "c13"
## [41] "c14" "c15" "c16" "c17" "c18"
## [46] "d1" "d2" "d3" "d4" "d5"
## [51] "d6" "d7" "d8" "d9" "d10"
## [56] "e1" "e2" "e3" "e4" "e51"
## [61] "e52" "e53" "e54" "e55" "e56"
## [66] "e57" "e58" "e59" "e60" "e61"
## [71] "e62" "e63" "e64" "e65" "e66"
## [76] "e6" "e7" "e8" "e9" "e10"
## [81] "f1" "f2" "f3" "f4" "f5"
## [86] "f6" "f7" "f8" "f9" "f10a"
## [91] "f10b" "f10bb" "f10c" "f10cc" "f11a"
## [96] "f11b" "f11c" "f11d" "f11e" "f11f"
## [101] "f11ff" "f11g" "f12" "f13a" "f13b"
## [106] "f13bb" "f13c" "f13cc" "f14a" "f14b"
## [111] "f14c" "f14d" "f14e" "f14f" "f14ff"
## [116] "f14g" "g1" "g2" "g3" "g4"
## [121] "g5" "g6" "g7" "g8" "g9"
## [126] "g10" "g11" "g12" "g13" "g14"
## [131] "h1" "h2" "h3" "h4a" "h4b"
## [136] "h4c" "h4d" "h4e" "h4f" "h5"
## [141] "h6" "h7" "h8" "h9" "h10"
## [146] "desig_cat" "h22clearn"
attach(total) #attach new updated data frame, total
## The following object is masked _by_ .GlobalEnv:
##
## h22clearn
## The following objects are masked from total (position 3):
##
## a1, a2, a3, a4, a5, a6, b1, b10, b11, b12, b13, b14, b15, b16,
## b17, b18, b19, b2, b20, b3, b4, b5, b6, b7, b8, b9, c1, c10,
## c11, c12, c13, c14, c15, c16, c17, c18, c2, c3, c4, c5, c6,
## c7, c8, c9, d1, d10, d2, d3, d4, d5, d6, d7, d8, d9, e1, e10,
## e2, e3, e4, e51, e52, e53, e54, e55, e56, e57, e58, e59, e6,
## e60, e61, e62, e63, e64, e65, e66, e7, e8, e9, f1, f10a, f10b,
## f10bb, f10c, f10cc, f11a, f11b, f11c, f11d, f11e, f11f, f11ff,
## f11g, f12, f13a, f13b, f13bb, f13c, f13cc, f14a, f14b, f14c,
## f14d, f14e, f14f, f14ff, f14g, f2, f3, f4, f5, f6, f7, f8, f9,
## g1, g10, g11, g12, g13, g14, g2, g3, g4, g5, g6, g7, g8, g9,
## h1, h10, h2, h3, h4a, h4b, h4c, h4d, h4e, h4f, h5, h6, h7, h8,
## h9, id
# Age
mean(h22clearn, na.rm = TRUE)
## [1] 48.5
median(h22clearn, na.rm = TRUE)
## [1] 51
names(tumkur)
## [1] "id" "a1" "a2" "a3" "a4"
## [6] "a5" "a6" "b1" "b2" "b3"
## [11] "b4" "b5" "b6" "b7" "b8"
## [16] "b9" "b10" "b11" "b12" "b13"
## [21] "b14" "b15" "b16" "b17" "b18"
## [26] "b19" "b20" "c1" "c2" "c3"
## [31] "c4" "c5" "c6" "c7" "c8"
## [36] "c9" "c10" "c11" "c12" "c13"
## [41] "c14" "c15" "c16" "c17" "c18"
## [46] "d1" "d2" "d3" "d4" "d5"
## [51] "d6" "d7" "d8" "d9" "d10"
## [56] "e1" "e2" "e3" "e4" "e51"
## [61] "e52" "e53" "e54" "e55" "e56"
## [66] "e57" "e58" "e59" "e60" "e61"
## [71] "e62" "e63" "e64" "e65" "e66"
## [76] "e6" "e7" "e8" "e9" "e10"
## [81] "f1" "f2" "f3" "f4" "f5"
## [86] "f6" "f7" "f8" "f9" "f10a"
## [91] "f10b" "f10bb" "f10c" "f10cc" "f11a"
## [96] "f11b" "f11c" "f11d" "f11e" "f11f"
## [101] "f11ff" "f11g" "f12" "f13a" "f13b"
## [106] "f13bb" "f13c" "f13cc" "f14a" "f14b"
## [111] "f14c" "f14d" "f14e" "f14f" "f14ff"
## [116] "f14g" "g1" "g2" "g3" "g4"
## [121] "g5" "g6" "g7" "g8" "g9"
## [126] "g10" "g11" "g12" "g13" "g14"
## [131] "h1" "h2" "h3" "h4a" "h4b"
## [136] "h4c" "h4d" "h4e" "h4f" "h5"
## [141] "h6" "h7" "h8" "h9" "h10"
## [146] "desig_cat"
# re-run subsetting to include the new column
tumkur <- total[c(1:65), ]
raichur <- total[c(66:92), ]
names(tumkur) #verify
## [1] "id" "a1" "a2" "a3" "a4"
## [6] "a5" "a6" "b1" "b2" "b3"
## [11] "b4" "b5" "b6" "b7" "b8"
## [16] "b9" "b10" "b11" "b12" "b13"
## [21] "b14" "b15" "b16" "b17" "b18"
## [26] "b19" "b20" "c1" "c2" "c3"
## [31] "c4" "c5" "c6" "c7" "c8"
## [36] "c9" "c10" "c11" "c12" "c13"
## [41] "c14" "c15" "c16" "c17" "c18"
## [46] "d1" "d2" "d3" "d4" "d5"
## [51] "d6" "d7" "d8" "d9" "d10"
## [56] "e1" "e2" "e3" "e4" "e51"
## [61] "e52" "e53" "e54" "e55" "e56"
## [66] "e57" "e58" "e59" "e60" "e61"
## [71] "e62" "e63" "e64" "e65" "e66"
## [76] "e6" "e7" "e8" "e9" "e10"
## [81] "f1" "f2" "f3" "f4" "f5"
## [86] "f6" "f7" "f8" "f9" "f10a"
## [91] "f10b" "f10bb" "f10c" "f10cc" "f11a"
## [96] "f11b" "f11c" "f11d" "f11e" "f11f"
## [101] "f11ff" "f11g" "f12" "f13a" "f13b"
## [106] "f13bb" "f13c" "f13cc" "f14a" "f14b"
## [111] "f14c" "f14d" "f14e" "f14f" "f14ff"
## [116] "f14g" "g1" "g2" "g3" "g4"
## [121] "g5" "g6" "g7" "g8" "g9"
## [126] "g10" "g11" "g12" "g13" "g14"
## [131] "h1" "h2" "h3" "h4a" "h4b"
## [136] "h4c" "h4d" "h4e" "h4f" "h5"
## [141] "h6" "h7" "h8" "h9" "h10"
## [146] "desig_cat" "h22clearn"
mean(tumkur$h22clearn, na.rm = TRUE)
## [1] 48.67
mean(raichur$h22clearn, na.rm = TRUE)
## [1] 48.11
## Subsetting by designation
View(e3)
# Age histograms
hist(h22clearn, breaks = 5, col = "gray", border = "black", main = "Age distribution among survey respondents",
xlab = "Age", ylab = "Number of health managers")
hist(tumkur$h22clearn, breaks = 5, col = "gray", border = "black", main = "Age distribution among survey respondents in Tumkur",
xlab = "Age", ylab = "Number of health managers")
hist(raichur$h22clearn, breaks = 5, col = "gray", border = "black", main = "Age distribution among survey respondents in Raichur",
xlab = "Age", ylab = "Number of health managers")
agelessthan40 <- c(length(c(which(h22clearn < 40))), length(c(which(h22clearn >
40))))
agelessthan40
## [1] 16 73
percentabove40 <- agelessthan40[2] * 100/length(h22clearn)
percentabove40
## [1] 79.35
# schooling
View(h3)
barplot(table(h3), main = "Health managers' educational background in Raichur & Tumkur",
names.arg = c("Rural", "Semi-rural", "Semi-urban", "Non-bangalore cities",
"Bangalore"), border = TRUE, ylab = "number of health managers") #Explore bar plot
h33 <- as.data.frame(table(h3)) # Make the table as a new data frame
View(h33) #Verify
h33$Freq #Verify that this is a data frame. If it were a matrix, it would have said this cannot be done on atomic vector
## [1] 37 30 9 13 3
highschool <- c("Rural", "Semi-urban", "Urban") #Decide logical categories. Shrink to these three.
hsnumber <- c(h33$Freq[1] + h33$Freq[2], h33$Freq[3], h33$Freq[4] + h33$Freq[5]) #Shrink freqquency values to these same categories
hsnumber #Verify vector
## [1] 67 9 16
Schooling <- data.frame(Hsattend = highschool, Number = hsnumber) #Make new data frame
barplot(Schooling$Number, names.arg = highschool, main = "Raichur & Tumkur health managers' characteristics by high-school attendance",
border = TRUE, xlab = "High school attendance", ylab = "Number")
prop.table(Schooling$Number)
## [1] 0.72826 0.09783 0.17391
# Educational qualifications
names(total) #To verify the varibale names
## [1] "id" "a1" "a2" "a3" "a4"
## [6] "a5" "a6" "b1" "b2" "b3"
## [11] "b4" "b5" "b6" "b7" "b8"
## [16] "b9" "b10" "b11" "b12" "b13"
## [21] "b14" "b15" "b16" "b17" "b18"
## [26] "b19" "b20" "c1" "c2" "c3"
## [31] "c4" "c5" "c6" "c7" "c8"
## [36] "c9" "c10" "c11" "c12" "c13"
## [41] "c14" "c15" "c16" "c17" "c18"
## [46] "d1" "d2" "d3" "d4" "d5"
## [51] "d6" "d7" "d8" "d9" "d10"
## [56] "e1" "e2" "e3" "e4" "e51"
## [61] "e52" "e53" "e54" "e55" "e56"
## [66] "e57" "e58" "e59" "e60" "e61"
## [71] "e62" "e63" "e64" "e65" "e66"
## [76] "e6" "e7" "e8" "e9" "e10"
## [81] "f1" "f2" "f3" "f4" "f5"
## [86] "f6" "f7" "f8" "f9" "f10a"
## [91] "f10b" "f10bb" "f10c" "f10cc" "f11a"
## [96] "f11b" "f11c" "f11d" "f11e" "f11f"
## [101] "f11ff" "f11g" "f12" "f13a" "f13b"
## [106] "f13bb" "f13c" "f13cc" "f14a" "f14b"
## [111] "f14c" "f14d" "f14e" "f14f" "f14ff"
## [116] "f14g" "g1" "g2" "g3" "g4"
## [121] "g5" "g6" "g7" "g8" "g9"
## [126] "g10" "g11" "g12" "g13" "g14"
## [131] "h1" "h2" "h3" "h4a" "h4b"
## [136] "h4c" "h4d" "h4e" "h4f" "h5"
## [141] "h6" "h7" "h8" "h9" "h10"
## [146] "desig_cat" "h22clearn"
h4ab <- c(h4a + h4b) #combine UG and PG docs. 1 is UG;2 is PG
h4ee <- c(h4e + h4f)
h4ee[which(h4ee > 1)] <- 1 #replace all >1 values with 1 as the last 2 cats can be merged while presenting results
h4ad <- c(which(h4d[which(h4a == 1)] == 1)) #which medical graduates also had management degree
h4bd <- c(which(h4d[which(h4b == 1)] == 1)) #which medical pgs also had management degree
h4ad
## [1] 6 26
h4bd
## [1] 6 21
edu <- c("Med grad", "Med Sp'list", "Nursing", "M'gmt", "Other grad")
edutot <- c(sum(h4a), abs(sum(h4b - h4a)), sum(h4c), sum(h4d), abs(sum(h4ee -
h4d)))
prop.table(edutot)
## [1] 0.47674 0.11628 0.03488 0.20930 0.16279
barplot(edutot, names.arg = edu, main = "Educational qualifications of health managers (Raichur & Tumkur)",
border = TRUE, xlab = "Educational Qualifications", ylab = "Number")
## Medical college
mcol <- as.data.frame(table(h5))
mcol_leg <- c("Non med.", "Govt.", "Pvt.")
barplot(mcol$Freq, names.arg = mcol_leg, main = "Medical education of health managers in Raichur & Tumkur",
border = TRUE, ylab = "Number")
## 4 barplots in 1 figure
par(mfrow = c(2, 2))
barplot(mcol$Freq, width = 0.15, names.arg = mcol_leg, main = "Medical education of health managers in \nRaichur & Tumkur",
border = TRUE, ylab = "Number", xlim = c(0, 1))
barplot(edutot, names.arg = edu, width = 0.15, main = "Educational qualifications of health managers in \nRaichur & Tumkur",
border = TRUE, xlab = "Educational Qualifications", ylab = "Number", xlim = c(0,
1))
hist(h22clearn, breaks = 5, col = "gray", border = "black", main = "Age distribution of health managers",
xlab = "Age", ylab = "Number of health managers")
barplot(Schooling$Number, width = 0.15, names.arg = highschool, main = "Raichur & Tumkur health managers' characteristics \nby high-school attendance",
border = TRUE, xlab = "High school attendance", ylab = "Number", xlim = c(0,
1))
## Years in service
h66 <- 2013 - h6 #Make separate vector with years of service
h66 #verify
## [1] 27 39 28 25 29 28 26 24 26 31 37 17 29 7 33 22 38 34 36 16 31 40 16
## [24] 24 31 16 31 7 16 37 19 38 29 37 27 30 31 5 5 17 37 16 11 13 5 37
## [47] 37 4 4 4 3 5 5 36 32 41 32 20 22 32 34 16 17 38 28 33 7 21 17
## [70] 4 18 18 18 41 5 37 35 4 26 43 27 34 34 8 5 38 4 21 36 18 27 21
total <- cbind(total, h66) #add this column to the data frame
names(total) #verify
## [1] "id" "a1" "a2" "a3" "a4"
## [6] "a5" "a6" "b1" "b2" "b3"
## [11] "b4" "b5" "b6" "b7" "b8"
## [16] "b9" "b10" "b11" "b12" "b13"
## [21] "b14" "b15" "b16" "b17" "b18"
## [26] "b19" "b20" "c1" "c2" "c3"
## [31] "c4" "c5" "c6" "c7" "c8"
## [36] "c9" "c10" "c11" "c12" "c13"
## [41] "c14" "c15" "c16" "c17" "c18"
## [46] "d1" "d2" "d3" "d4" "d5"
## [51] "d6" "d7" "d8" "d9" "d10"
## [56] "e1" "e2" "e3" "e4" "e51"
## [61] "e52" "e53" "e54" "e55" "e56"
## [66] "e57" "e58" "e59" "e60" "e61"
## [71] "e62" "e63" "e64" "e65" "e66"
## [76] "e6" "e7" "e8" "e9" "e10"
## [81] "f1" "f2" "f3" "f4" "f5"
## [86] "f6" "f7" "f8" "f9" "f10a"
## [91] "f10b" "f10bb" "f10c" "f10cc" "f11a"
## [96] "f11b" "f11c" "f11d" "f11e" "f11f"
## [101] "f11ff" "f11g" "f12" "f13a" "f13b"
## [106] "f13bb" "f13c" "f13cc" "f14a" "f14b"
## [111] "f14c" "f14d" "f14e" "f14f" "f14ff"
## [116] "f14g" "g1" "g2" "g3" "g4"
## [121] "g5" "g6" "g7" "g8" "g9"
## [126] "g10" "g11" "g12" "g13" "g14"
## [131] "h1" "h2" "h3" "h4a" "h4b"
## [136] "h4c" "h4d" "h4e" "h4f" "h5"
## [141] "h6" "h7" "h8" "h9" "h10"
## [146] "desig_cat" "h22clearn" "h66"
summary(h66)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.0 16.0 26.0 23.6 34.0 43.0
boxplot(h66[which(desig == "dhm")])
## Violin plots Tryiung violinplots for this. This is chumma done. No
## point really to have 'kernel density ploting' for this! Violin plot
## for years of experience by designation
library(vioplot)
## Loading required package: sm
## Package `sm', version 2.2-4.1 Copyright (C) 1997, 2000, 2005, 2007, 2008,
## A.W.Bowman & A.Azzalini Type help(sm) for summary information
viol1 <- h66[desig == "dhm"]
viol2 <- h66[desig == "thm"]
viol3 <- h66[desig == "thom"]
vioplot(viol1, viol2, viol3, names = c("Dt. hm", "Taluka hm", "Taluka hosp manager"),
col = "gray")
title("Violin plots of years of experience of health managers")
# Violin plots for age by designation
library(vioplot)
viol1_age <- h22clearn[desig == "dhm"]
viol2_age <- h22clearn[desig == "thm"]
viol3_age <- h22clearn[desig == "thom"]
viol2_age[1] <- as.integer(mean(viol2_age, na.rm = TRUE)) #Assume mean age for 2 NA of THOs
viol2_age[35] <- as.integer(mean(viol2_age, na.rm = TRUE)) #Assume mean age for 2 NA of THOs
vioplot(viol1_age, viol2_age, viol3_age, names = c("Dt. hm", "Taluka hm", "Taluka hosp manager"),
col = "gray")
title("Violin plots of age of health managers")
####### Survey responses
## Module B
# B1
par(mfrow = c(1, 1)) #1 graph per page
b1_resp <- as.data.frame(table(b1)) #make data frome of table
barplot(b1_resp$Freq, names.arg = c("NA", "Evaluate", "Dt/State", "Planning",
"Assesment"), border = TRUE, main = "Health managers perception on \nrole of plans under NRHM",
xlab = "Responses", ylab = "Number")
prop.table(b1_resp$Freq)
## [1] 0.03261 0.04348 0.19565 0.64130 0.08696
# grouped bar plot to compare responses between Tumkur and Raichur. THIS
# ONE ABORTED. DONT RUN. KEPT FOR THE RECORD!!!
# tumkurb1<-as.data.frame(table(tumkur$b1))
# raichurb1<-as.data.frame(table(raichur$b1))
# b1compare<-table(tumkurb1$Freq, raichurb1$Freq) barplot(b1compare,
# names.arg=c('Evaluate','Dt/State','Planning','Assesment'), border=TRUE,
# main='Perception on NRHM planning among \nhealth managers in Tumkur
# compared to Raichur', legend=names(b1compare), beside=TRUE)
# B2
table(b2)
## b2
## 1 2 3 4 5
## 2 11 9 35 35
b2_resp <- as.data.frame(table(b2)) #make data frome of table
barplot(b2_resp$Freq, names.arg = c("State", "Dt.", "Taluka", "PHC", "VHSC"),
border = TRUE, main = "Most peripheral level for planning", xlab = "Responses",
ylab = "Number")
# B2 stacked bar
b2_resp_tum <- as.data.frame(table(tumkur$b2)) #make data frome of table
b2_resp_rai <- as.data.frame(table(raichur$b2)) #make data frome of table
b2compare <- table(b2_resp_tum$Freq, b2_resp_rai$Freq)
barplot(b2compare, names.arg = c("State", "Dt.", "Taluka", "PHC", "VHSC"), border = TRUE,
main = "Most peripheral level for planning", xlab = "Responses", ylab = "Number")
# Stacked bar trials using qplot/ggplot
library(ggplot2)
qplot(factor(b2), data = total, geom = "bar", fill = factor(desig_cat)) #Replace b4 & e1 in this line for stacked barplots
qplot(b1, b2, data = total)