The following dataset is obtained from here. The patients.csv contains:
The by_reading.csv contains:
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
reading = read.csv('bg_reading.csv', header=TRUE)
patient = read.csv('patients.csv', header=TRUE)
reading$age = c(1:dim(reading)[1])
reading$type = c(1:dim(reading)[1])
for (i in 1:dim(reading)[1]){
reading$age[i] = patient[patient$id==reading$patient_id[i],c('age')]
reading$type[i] = patient[patient$id==reading$patient_id[i],c('diabetes_type')]
}
head(reading)
## date bg id patient_id age type
## 1 2006-08-28 85.84510 40817 28 26 1
## 2 2006-03-19 94.33209 14718 10 66 2
## 3 2005-08-28 90.36989 12761 9 51 2
## 4 2006-11-29 96.14667 57766 39 26 2
## 5 2007-09-28 75.95284 31844 22 49 2
## 6 2006-10-02 82.43022 10054 7 70 1
Here is a scatterplot of age vs mean blood gluecose reading:
age_grouped = reading %>% group_by(age) %>%
summarise(mean_bg = mean(bg),
n = n())
ggplot(age_grouped, aes(age, mean_bg)) +
geom_point()
Here is a line plot of the same data:
ggplot(age_grouped, aes(age, mean_bg)) +
geom_line()
If we look at the number of patients that fall into each age group, we can see that only a few groups have more than one patient:
table(patient$age)
##
## 21 23 25 26 27 28 31 32 34 35 37 39 41 42 43 44 46 48 49 51 55 58 61 62 63
## 1 1 1 2 1 1 1 2 2 2 3 3 2 2 1 2 1 2 2 2 1 1 1 1 3
## 64 65 66 70
## 3 2 1 2
Thus, the spikes shown at age 28, 41 and 55 are not representitive for that age in the population.
age_type_grouped = reading %>% group_by(age, type) %>%
summarise(mean_bg = mean(bg),
n = n())
age_type_grouped$type = as.factor(age_type_grouped$type)
ggplot(age_type_grouped, aes(age, mean_bg)) +
geom_point(aes(color=type)) +
geom_line(aes(color=type))
It seems that for type 1 patients, they controlled their gluecose readings very well. But for type 2 patients, the gluecose level fluctuates.