library(dplyr)
library(ggplot2)
library(knitr)
library(tidyr)
Import dataset2
# logon info (55.8 MB)
logon <- read.table("dataset2/logon_info.csv", header = TRUE, sep = ",", as.is = TRUE)
logon$date <- strptime(logon$date, "%m/%d/%Y %T")
logon$activity <- as.factor(logon$activity)
logon.csv (~ records)
- Fields: id, date, user, pc, activity (Logon/Logoff)
- some logons have been removed from the dataset, to mimic a “messy” scenario
- Each user has an assigned machine, but can share others
- 100 machines shared (physically shared) by some of the users in addition to their assigned PC.
- Some logons occur after-hours *Note: Screen unlocks are recorded as logons. Screen locks are not recorded.
# logon info
head(logon)
## id date user pc activity
## 1 {X1D9-S0ES98JV-5357PWMI} 2010-01-02 06:49:00 NGF0157 PC-6056 Logon
## 2 {G2B3-L6EJ61GT-2222RKSO} 2010-01-02 06:50:00 LRR0148 PC-4275 Logon
## 3 {U6Q3-U0WE70UA-3770UREL} 2010-01-02 06:53:04 LRR0148 PC-4124 Logon
## 4 {I0N5-R7NA26TG-6263KNGM} 2010-01-02 07:00:00 IRM0931 PC-7188 Logon
## 5 {D1S0-N6FH62BT-5398KANK} 2010-01-02 07:00:00 MOH0273 PC-6699 Logon
## 6 {S6P1-M4MK04BB-0722IITW} 2010-01-02 07:07:00 LAP0338 PC-5758 Logon
dim(logon)
## [1] 854859 5
str(logon)
## 'data.frame': 854859 obs. of 5 variables:
## $ id : chr "{X1D9-S0ES98JV-5357PWMI}" "{G2B3-L6EJ61GT-2222RKSO}" "{U6Q3-U0WE70UA-3770UREL}" "{I0N5-R7NA26TG-6263KNGM}" ...
## $ date : POSIXlt, format: "2010-01-02 06:49:00" "2010-01-02 06:50:00" ...
## $ user : chr "NGF0157" "LRR0148" "LRR0148" "IRM0931" ...
## $ pc : chr "PC-6056" "PC-4275" "PC-4124" "PC-7188" ...
## $ activity: Factor w/ 2 levels "Logoff","Logon": 2 2 2 2 2 2 2 2 2 2 ...
# check for missing values
table(is.na(logon))
##
## FALSE
## 4274295
summary(logon)
## id date user
## Length:854859 Min. :2010-01-02 06:49:00 Length:854859
## Class :character 1st Qu.:2010-04-27 12:06:42 Class :character
## Mode :character Median :2010-08-24 09:24:00 Mode :character
## Mean :2010-08-30 01:03:22
## 3rd Qu.:2011-01-03 07:41:00
## Max. :2011-05-17 06:43:35
## pc activity
## Length:854859 Logoff:384268
## Class :character Logon :470591
## Mode :character
##
##
##