I found some data on migraine headaches and thought this was an opportunity to try Lubridate. The data include a Start and End date and a Severity of the pain on a scale of 1 to 5.

Here are the packages we will use.

library(ggplot2)
library(tidyverse)
library(lubridate)
library(Jmisc)

Read in the data

Pain<- read.csv("HeadacheData.csv")

Convert it to a Tibble df for the sake of Tidyverse.

Pain<- as.tibble(Pain)

We have to convert Date and End to data type Date.

str(Pain)
## Classes 'tbl_df', 'tbl' and 'data.frame':    26 obs. of  7 variables:
##  $ Date     : Factor w/ 26 levels "1/20/2017","10/10/2018",..: 12 17 16 21 23 26 3 6 5 8 ...
##  $ End      : Factor w/ 25 levels "1/25/2017","10/17/2018",..: 13 16 16 20 21 23 3 4 8 7 ...
##  $ Character: Factor w/ 1 level "Actual": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Severity : int  3 5 5 4 4 3 3 3 3 3 ...
##  $ X        : logi  NA NA NA NA NA NA ...
##  $ X.1      : logi  NA NA NA NA NA NA ...
##  $ X.2      : logi  NA NA NA NA NA NA ...
Pain$Date<- as.Date(Pain$Date, format = "%m/%d/%Y")
Pain$End<- as.Date(Pain$End, format = "%m/%d/%Y")
str(Pain)
## Classes 'tbl_df', 'tbl' and 'data.frame':    26 obs. of  7 variables:
##  $ Date     : Date, format: "2018-03-27" "2018-05-04" ...
##  $ End      : Date, format: "2018-04-02" "2018-06-02" ...
##  $ Character: Factor w/ 1 level "Actual": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Severity : int  3 5 5 4 4 3 3 3 3 3 ...
##  $ X        : logi  NA NA NA NA NA NA ...
##  $ X.1      : logi  NA NA NA NA NA NA ...
##  $ X.2      : logi  NA NA NA NA NA NA ...

Not sure why both Order and Arrange are necesary but so be it.

Pain<-Pain[order(Pain$Date) , ]
Pain%>%
  arrange(Pain$Date)
## # A tibble: 26 x 7
##    Date       End        Character Severity X     X.1   X.2  
##    <date>     <date>     <fct>        <int> <lgl> <lgl> <lgl>
##  1 2016-10-03 2016-10-09 Actual           3 NA    NA    NA   
##  2 2016-11-07 2016-11-12 Actual           3 NA    NA    NA   
##  3 2016-11-28 2016-12-03 Actual           3 NA    NA    NA   
##  4 2016-12-20 2016-12-25 Actual           3 NA    NA    NA   
##  5 2017-01-20 2017-01-25 Actual           3 NA    NA    NA   
##  6 2017-02-13 2017-02-17 Actual           3 NA    NA    NA   
##  7 2017-03-10 2017-03-16 Actual           3 NA    NA    NA   
##  8 2017-04-17 2017-04-23 Actual           3 NA    NA    NA   
##  9 2017-05-20 2017-06-06 Actual           3 NA    NA    NA   
## 10 2017-06-26 2017-07-02 Actual           3 NA    NA    NA   
## # ... with 16 more rows

Now everything is in Date order but somewhere along the line we picked up extraneous columns. Lets extract the ones we are interested in.

Pain%>%select(Date, End, Character, Severity )

Alright the data is formatted . Lets plot.

A<-ggplot(Pain, aes(x=Date, y=Severity)) +
    geom_point(shape=1) 
A

Lets make the size of the points a funtion of the Severity.

B<- ggplot(Pain, aes(x=Date, y=Severity)) +
    geom_point(aes(size=Severity)) 
B

Not enough contrast btwn the point sizes. Lets change size exponentially.

Pain<-Pain%>%mutate(Raised_Severity =Severity^4.25)
head(Pain,3)
## # A tibble: 3 x 8
##   Date       End        Character Severity X     X.1   X.2  
##   <date>     <date>     <fct>        <int> <lgl> <lgl> <lgl>
## 1 2016-10-03 2016-10-09 Actual           3 NA    NA    NA   
## 2 2016-11-07 2016-11-12 Actual           3 NA    NA    NA   
## 3 2016-11-28 2016-12-03 Actual           3 NA    NA    NA   
## # ... with 1 more variable: Raised_Severity <dbl>

Sure enough, there is the new column that we were after but so are some more extraneuous ones that we will drop.

Pain%>%select(Date, End, Character, Severity, Raised_Severity )
head(Pain,3)
## # A tibble: 3 x 8
##   Date       End        Character Severity X     X.1   X.2  
##   <date>     <date>     <fct>        <int> <lgl> <lgl> <lgl>
## 1 2016-10-03 2016-10-09 Actual           3 NA    NA    NA   
## 2 2016-11-07 2016-11-12 Actual           3 NA    NA    NA   
## 3 2016-11-28 2016-12-03 Actual           3 NA    NA    NA   
## # ... with 1 more variable: Raised_Severity <dbl>

Again , let us clear out extraneous columns.

Pain<- Pain%>%
  select(-c(X,X.1,X.2))
Pain
## # A tibble: 26 x 5
##    Date       End        Character Severity Raised_Severity
##    <date>     <date>     <fct>        <int>           <dbl>
##  1 2016-10-03 2016-10-09 Actual           3            107.
##  2 2016-11-07 2016-11-12 Actual           3            107.
##  3 2016-11-28 2016-12-03 Actual           3            107.
##  4 2016-12-20 2016-12-25 Actual           3            107.
##  5 2017-01-20 2017-01-25 Actual           3            107.
##  6 2017-02-13 2017-02-17 Actual           3            107.
##  7 2017-03-10 2017-03-16 Actual           3            107.
##  8 2017-04-17 2017-04-23 Actual           3            107.
##  9 2017-05-20 2017-06-06 Actual           3            107.
## 10 2017-06-26 2017-07-02 Actual           3            107.
## # ... with 16 more rows

Ok let us try replotting with our augmented Severity.

C<- ggplot(Pain, aes(x=Date, y=Raised_Severity)) +
    geom_point(aes(size=Raised_Severity)) 
C

Lets change the points to red.

D<- ggplot(Pain, aes(x=Date, y=Raised_Severity)) +
    geom_point(aes(size=Raised_Severity, color="Red")) 
D

Now lets change the background for more contrast

E<-D+
  theme(
    panel.background = element_rect(fill = "black",
                                colour = "lightblue",
                                size = 0.5, linetype = "solid"))+
  theme(panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())
E

We dont need the legends.

F<-E+
  theme(legend.position = "none")
F

The bouncing up&down of the line contributes nothing. Lets put the points on a straight line. We will add a column filled witha constant number.

Pain<- addCol(Pain,value = c(Constant=3))
Pain
G<- ggplot(Pain, aes(x=Date ,y= Constant))+
  geom_point(aes(size= Raised_Severity, color="Red"))
G

H<-G+
  theme(
    panel.background = element_rect(fill = "black",
                                
                                ))+
  theme(panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())+
  theme(legend.position = "none")
H

Let us drop the Y axis.

I <- H+
  theme(axis.title.y=  element_blank(),
        axis.text.y =  element_blank(),
        axis.ticks.y = element_blank()
                )
I

And add a title

J<-I +
  ggtitle("Headaches: Frequency and Severity")
J

There is a lot of info contained in these little dots. The point’s location tells us When the headache occurred, the distance between the points tells us the time between occurences , and the size of the point tells os the relative severity.

The first thing we see is that things are getting worse. The time between is shrinking and the Severity is climbing.

Lets get more specific and look at the length of the headaches. Here we will use Lubridate to subtract one columns date’s (End) from another’s (Date)

K<- ggplot(Pain,aes(x=Date, y=length, color="Red"))+
  geom_bar(stat = "identity")+
  ggtitle("Length of the Headache")+
  labs(y = "Number of Days Headache Lasted")+
  theme(
    panel.background = element_rect(fill = "black",
                                
                                ))+
  theme(panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())+
  theme(legend.position = "none")
  
K
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.

M<-ggplot(Pain,aes(x=Date,y=Severity, color="Red"))+
  geom_line(size = 2)+
  theme(
    panel.background = element_rect(fill = "black"
                                
                                ))+
  theme(panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())+
  theme(legend.position = "none")+
  ggtitle("Severity of  Headaches")
M

bar chart showing sevirity

bar chart showing time between headaches

https://stackoverflow.com/questions/34333685/calulcate-running-difference-of-time-using-difftime-on-one-column-of-timestamps

Above we found the difference in time between two collumns now let us see what is involved in finding the difference within a single column of times.

Pain<-Pain%>% mutate(Difference =c(NA , difftime(Pain\(Date[-1] , Pain\)Date[-nrow(Pain)] , units = “days”)))

Pain<-Pain%>%
  mutate(Difference =c(NA , difftime(Pain$Date[-1] ,
                      Pain$Date[-nrow(Pain)] ,
                      units = "days")))
head(Pain,3)
##         Date        End Character Severity Raised_Severity Constant length
## 1 2016-10-03 2016-10-09    Actual        3         106.602        3 6 days
## 2 2016-11-07 2016-11-12    Actual        3         106.602        3 5 days
## 3 2016-11-28 2016-12-03    Actual        3         106.602        3 5 days
##   Difference
## 1         NA
## 2         35
## 3         21

And of course the errant columns make a return….

#Pain<- Pain%>%
# select(-c(X,X.1,X.2))

Lets replace that NA with a 0

Pain$Difference<-Pain$Difference%>% replace_na(0)
Pain
##          Date        End Character Severity Raised_Severity Constant
## 1  2016-10-03 2016-10-09    Actual        3        106.6020        3
## 2  2016-11-07 2016-11-12    Actual        3        106.6020        3
## 3  2016-11-28 2016-12-03    Actual        3        106.6020        3
## 4  2016-12-20 2016-12-25    Actual        3        106.6020        3
## 5  2017-01-20 2017-01-25    Actual        3        106.6020        3
## 6  2017-02-13 2017-02-17    Actual        3        106.6020        3
## 7  2017-03-10 2017-03-16    Actual        3        106.6020        3
## 8  2017-04-17 2017-04-23    Actual        3        106.6020        3
## 9  2017-05-20 2017-06-06    Actual        3        106.6020        3
## 10 2017-06-26 2017-07-02    Actual        3        106.6020        3
## 11 2017-08-01 2017-08-07    Actual        3        106.6020        3
## 12 2017-09-24 2017-09-30    Actual        3        106.6020        3
## 13 2017-11-24 2017-11-29    Actual        3        106.6020        3
## 14 2017-12-19 2017-12-24    Actual        3        106.6020        3
## 15 2018-02-11 2018-02-15    Actual        3        106.6020        3
## 16 2018-03-03 2018-03-09    Actual        3        106.6020        3
## 17 2018-03-27 2018-04-02    Actual        3        106.6020        3
## 18 2018-05-04 2018-06-02    Actual        5        934.5930        3
## 19 2018-05-09 2018-05-16    Actual        1          1.0000        3
## 20 2018-05-28 2018-06-02    Actual        5        934.5930        3
## 21 2018-06-18 2018-06-24    Actual        4        362.0387        3
## 22 2018-07-17 2018-07-22    Actual        4        362.0387        3
## 23 2018-08-12 2018-08-16    Actual        4        362.0387        3
## 24 2018-09-06 2018-09-12    Actual        3        106.6020        3
## 25 2018-09-12 2018-09-19    Actual        3        106.6020        3
## 26 2018-10-10 2018-10-17    Actual        3        106.6020        3
##     length Difference
## 1   6 days          0
## 2   5 days         35
## 3   5 days         21
## 4   5 days         22
## 5   5 days         31
## 6   4 days         24
## 7   6 days         25
## 8   6 days         38
## 9  17 days         33
## 10  6 days         37
## 11  6 days         36
## 12  6 days         54
## 13  5 days         61
## 14  5 days         25
## 15  4 days         54
## 16  6 days         20
## 17  6 days         24
## 18 29 days         38
## 19  7 days          5
## 20  5 days         19
## 21  6 days         21
## 22  5 days         29
## 23  4 days         26
## 24  6 days         25
## 25  7 days          6
## 26  7 days         28
Pain$Difference<-  as.numeric(Pain$Difference)
str(Pain)
## 'data.frame':    26 obs. of  8 variables:
##  $ Date           : Date, format: "2016-10-03" "2016-11-07" ...
##  $ End            : Date, format: "2016-10-09" "2016-11-12" ...
##  $ Character      : Factor w/ 1 level "Actual": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Severity       : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ Raised_Severity: num  107 107 107 107 107 ...
##  $ Constant       : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ length         : 'difftime' num  6 5 5 5 ...
##   ..- attr(*, "units")= chr "days"
##  $ Difference     : num  0 35 21 22 31 24 25 38 33 37 ...
L<- ggplot(Pain,aes(x=Date,y=Difference, color="Red"))+
  geom_line(size = 2)+
  theme(
    panel.background = element_rect(fill = "black",
                                
                                ))+
  theme(panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank())+
  theme(legend.position = "none")+
  ggtitle("Number of Days Between Headaches")
  
L