###Welcome to Lecture IV (Chapter II)

I would like you to start this handout by defining the following parameters: hosts, vulnerability, data frame, list, array, matrix, categorical data, operating systems, nodes, zones, IP.

# create a new data frame of hosts & high vuln counts
assets.df <- data.frame(
  name=c("danube","gander","ganges","mekong","orinoco"),
  os=c("W2K8","RHEL5","W2K8","RHEL5","RHEL5"),
  highvulns=c(1,0,2,0,0))
#Let us now take a look at the data frame assets.df
View(assets.df)
#Describe our dataframe in a few words
# take a look at the data frame structure & contents
str(assets.df)
## 'data.frame':    5 obs. of  3 variables:
##  $ name     : chr  "danube" "gander" "ganges" "mekong" ...
##  $ os       : chr  "W2K8" "RHEL5" "W2K8" "RHEL5" ...
##  $ highvulns: num  1 0 2 0 0
head(assets.df)
# show a "slice" just the operating systmes

Explain the output as well as the syntax of the R-code used for this activity (above).

# by default R creates "factors" for categorical data so
# we use as.character() to expand the factors out
head(assets.df$os)
## [1] "W2K8"  "RHEL5" "W2K8"  "RHEL5" "RHEL5"
#Explain the output
# add a new column
assets.df$ip <- c("192.168.1.5","10.2.7.5","192.168.1.7",
                     "10.2.7.6", "10.2.7.7")

Describe what we have just accomplished in the chunk above. In addition, explain the syntax used to complete this task.

# extract only nodes with more than one high vulnerabilty
head(assets.df[assets.df$highvulns>1,])

Explain the syntax as well as the output of the task executed above

# create a 'zones' column based on prefix IP value
assets.df$zones <- ifelse(grepl("^192",assets.df$ip),"Zone1","Zone2")

Explain the syntax as well as the output of the chunk ran above

head(assets.df)

Take a final look at the dataframe and explain your findings on this activity.

library(ggplot2)
assets.df$highvulns<-as.factor(assets.df$highvulns)
p<-ggplot(data=assets.df,aes())+geom_bar(aes(zones,fill=zones))+labs(title="IP counts by Zone")
p

assets.df$highvulns<-as.factor(assets.df$highvulns)
p<-ggplot(data=assets.df,aes())+geom_bar(aes(os,fill=zones))+labs(title="IP counts by Zone")#Nice graph
p

assets.df$highvulns<-as.factor(assets.df$highvulns)
p<-ggplot(data=assets.df,aes())+geom_bar(aes(highvulns,fill=highvulns))+facet_grid(~zones)+labs(title="High vulnerability counts by Zone")
p