R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

#CReating a data frame
years<-c(1980,1980,1985,1990)
scores<-c(34,44,56,83)
df<-data.frame(years,scores)
df
##   years scores
## 1  1980     34
## 2  1980     44
## 3  1985     56
## 4  1990     83
df[,1]
## [1] 1980 1980 1985 1990
df[1,]
##   years scores
## 1  1980     34
#Subsetting
df[df$scores<50]
##   years scores
## 1  1980     34
## 2  1980     44
## 3  1985     56
## 4  1990     83
#Adding new Attribute
df$Age<-c("..")
df
##   years scores Age
## 1  1980     34  ..
## 2  1980     44  ..
## 3  1985     56  ..
## 4  1990     83  ..
#creating a Data frame
subject_name <-c("John doe","james doe","Steve Graves")
Temperature <-c(98.1,98.6,101.4)
flu_status <-c(FALSE,FALSE,FALSE)

df<-data.frame(subject_name,Temperature,flu_status)
print(df)
##   subject_name Temperature flu_status
## 1     John doe        98.1      FALSE
## 2    james doe        98.6      FALSE
## 3 Steve Graves       101.4      FALSE
# Toy medical test
subject_name <- c("John Doe", "James Doe", "Steve Graves")
Temperature <- c(98.1, 98.6, 101.4)
flu_status <- c(FALSE, FALSE, TRUE)
df <- data.frame(subject_name, Temperature, flu_status)
print(df)
##   subject_name Temperature flu_status
## 1     John Doe        98.1      FALSE
## 2    James Doe        98.6      FALSE
## 3 Steve Graves       101.4       TRUE
blood<-factor(c("O","AB","A"),levels=c("A","AB","O"))
blood
## [1] O  AB A 
## Levels: A AB O
# to access the positon of each patient on the data set
Temperature[2]
## [1] 98.6
Temperature[2:3]
## [1]  98.6 101.4
Temperature[-2] 
## [1]  98.1 101.4
Temperature[-3]
## [1] 98.1 98.6
#A factor is a kind of vector that store categorical data or ordinal varibale 
#reprstented as # factor( )

# creating a factor from a charcater vector
gender<-factor(c("male","Female","male"))
gender
## [1] male   Female male  
## Levels: Female male
# creating an ordinal factor 
#an ordinal factor is a kind  of factor where the levles have a natural ordering  or heirachy 
# an ordinal factor is represented as factor ( ) and sepcifyng the levels in desired order
 education <- factor(c( "highschool","college","Graduate"))
 levels=c("highschool","college","graduate")
 education
## [1] highschool college    Graduate  
## Levels: college Graduate highschool
 education <- ordered(c( "highschool","college","graduate"))
 levels=c("highschool","college","graduate")
 education
## [1] highschool college    graduate  
## Levels: college < graduate < highschool
symptoms <-factor (c("severe","mild","moderate"),levels=c("mild","moderate","severe"),ordered=TRUE)
symptoms
## [1] severe   mild     moderate
## Levels: mild < moderate < severe
#To Test wether each patients symptoms are more severe and moderate:
symptoms>"moderate"
## [1]  TRUE FALSE FALSE
#instead of displaying the patient name,temperature ,flu_status one after the order in ths way:
subject_name[1]
## [1] "John Doe"
Temperature[1]
## [1] 98.1
flu_status[1]
## [1] FALSE
gender[1]
## [1] male
## Levels: Female male
blood[1]
## [1] O
## Levels: A AB O
symptoms[1]
## [1] severe
## Levels: mild < moderate < severe
#creating a data frame for this  information 
pt_data <-data.frame(subject_name,Temperature,flu_status,gender,blood,symptoms,stringsAsFactors = FALSE)
pt_data
##   subject_name Temperature flu_status gender blood symptoms
## 1     John Doe        98.1      FALSE   male     O   severe
## 2    James Doe        98.6      FALSE Female    AB     mild
## 3 Steve Graves       101.4       TRUE   male     A moderate
#Accessng Data in the  data frame
pt_data$subject_name
## [1] "John Doe"     "James Doe"    "Steve Graves"
#To extract the value in the first row and second column  of the patient data frame
pt_data[1,2]
## [1] 98.1
#To refer to every row or every column ,simply leave the row or column portion blank 
#TO EXTRACT LL ROWS OF THE FIRST COLUMN
pt_data[,1]
## [1] "John Doe"     "James Doe"    "Steve Graves"
#to pull data of the first row & third row second & fourth column 
pt_data[c(1,3),c(2,4)]
##   Temperature gender
## 1        98.1   male
## 3       101.4   male
#to extract every information
pt_data[,]
##   subject_name Temperature flu_status gender blood symptoms
## 1     John Doe        98.1      FALSE   male     O   severe
## 2    James Doe        98.6      FALSE Female    AB     mild
## 3 Steve Graves       101.4       TRUE   male     A moderate
#note that columns are better accessed by name rather than positions
pt_data[c(1,3),c("Temperature","gender")]
##   Temperature gender
## 1        98.1   male
## 3       101.4   male
#creating new columns in a dataframe
pt_data$temp_c<-(pt_data$Temperature-32)*(5/9)
pt_data
##   subject_name Temperature flu_status gender blood symptoms   temp_c
## 1     John Doe        98.1      FALSE   male     O   severe 36.72222
## 2    James Doe        98.6      FALSE Female    AB     mild 37.00000
## 3 Steve Graves       101.4       TRUE   male     A moderate 38.55556
#checking on our added column and to confirm  the calculation worked
#lets compare the new celsius -based temp_c column to the previous Farenheit-scale temperature column:
pt_data[c("Temperature","temp_c")]
##   Temperature   temp_c
## 1        98.1 36.72222
## 2        98.6 37.00000
## 3       101.4 38.55556
#VISUALIZING DATA.PPTX(DATA EXPLORATION) GGPLOT2  

#Graphics # to plot data use the function plot()  and plot(x,y) produces a scatterplot
#of the numbers  in x versus the number in y
# many options can be passed in to the plot() function,passing in the argument  xlab will result in alebel on the x-axis
# and will  result in a label on the x-axis
#To find out more information about the plot( ) function, type ?plot
#Labels helps viewers to understand what the plotted values are in the X and Y Axis
?plot
## starting httpd help server ... done
# This is generating random data
x<-rnorm (100)
y<-rnorm(100)
plot(x,y)

plot(x,y, xlab=" this is the x-axis",ylab="this is the y-axix",)

 main="plot of X vs Y"

# we learn how to visualize data using ggplot2,
#R has several packages used for making grpahs ,ggplotee seems the best
#ggplot2 implements the grammar of the graphics


#mpg data frame, ia data set in rprogramming  included in the ggplot2 package,
#that has information about data on fuel economy for various car model