This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
#CReating a data frame
years<-c(1980,1980,1985,1990)
scores<-c(34,44,56,83)
df<-data.frame(years,scores)
df
## years scores
## 1 1980 34
## 2 1980 44
## 3 1985 56
## 4 1990 83
df[,1]
## [1] 1980 1980 1985 1990
df[1,]
## years scores
## 1 1980 34
#Subsetting
df[df$scores<50]
## years scores
## 1 1980 34
## 2 1980 44
## 3 1985 56
## 4 1990 83
#Adding new Attribute
df$Age<-c("..")
df
## years scores Age
## 1 1980 34 ..
## 2 1980 44 ..
## 3 1985 56 ..
## 4 1990 83 ..
#creating a Data frame
subject_name <-c("John doe","james doe","Steve Graves")
Temperature <-c(98.1,98.6,101.4)
flu_status <-c(FALSE,FALSE,FALSE)
df<-data.frame(subject_name,Temperature,flu_status)
print(df)
## subject_name Temperature flu_status
## 1 John doe 98.1 FALSE
## 2 james doe 98.6 FALSE
## 3 Steve Graves 101.4 FALSE
# Toy medical test
subject_name <- c("John Doe", "James Doe", "Steve Graves")
Temperature <- c(98.1, 98.6, 101.4)
flu_status <- c(FALSE, FALSE, TRUE)
df <- data.frame(subject_name, Temperature, flu_status)
print(df)
## subject_name Temperature flu_status
## 1 John Doe 98.1 FALSE
## 2 James Doe 98.6 FALSE
## 3 Steve Graves 101.4 TRUE
blood<-factor(c("O","AB","A"),levels=c("A","AB","O"))
blood
## [1] O AB A
## Levels: A AB O
# to access the positon of each patient on the data set
Temperature[2]
## [1] 98.6
Temperature[2:3]
## [1] 98.6 101.4
Temperature[-2]
## [1] 98.1 101.4
Temperature[-3]
## [1] 98.1 98.6
#A factor is a kind of vector that store categorical data or ordinal varibale
#reprstented as # factor( )
# creating a factor from a charcater vector
gender<-factor(c("male","Female","male"))
gender
## [1] male Female male
## Levels: Female male
# creating an ordinal factor
#an ordinal factor is a kind of factor where the levles have a natural ordering or heirachy
# an ordinal factor is represented as factor ( ) and sepcifyng the levels in desired order
education <- factor(c( "highschool","college","Graduate"))
levels=c("highschool","college","graduate")
education
## [1] highschool college Graduate
## Levels: college Graduate highschool
education <- ordered(c( "highschool","college","graduate"))
levels=c("highschool","college","graduate")
education
## [1] highschool college graduate
## Levels: college < graduate < highschool
symptoms <-factor (c("severe","mild","moderate"),levels=c("mild","moderate","severe"),ordered=TRUE)
symptoms
## [1] severe mild moderate
## Levels: mild < moderate < severe
#To Test wether each patients symptoms are more severe and moderate:
symptoms>"moderate"
## [1] TRUE FALSE FALSE
#instead of displaying the patient name,temperature ,flu_status one after the order in ths way:
subject_name[1]
## [1] "John Doe"
Temperature[1]
## [1] 98.1
flu_status[1]
## [1] FALSE
gender[1]
## [1] male
## Levels: Female male
blood[1]
## [1] O
## Levels: A AB O
symptoms[1]
## [1] severe
## Levels: mild < moderate < severe
#creating a data frame for this information
pt_data <-data.frame(subject_name,Temperature,flu_status,gender,blood,symptoms,stringsAsFactors = FALSE)
pt_data
## subject_name Temperature flu_status gender blood symptoms
## 1 John Doe 98.1 FALSE male O severe
## 2 James Doe 98.6 FALSE Female AB mild
## 3 Steve Graves 101.4 TRUE male A moderate
#Accessng Data in the data frame
pt_data$subject_name
## [1] "John Doe" "James Doe" "Steve Graves"
#To extract the value in the first row and second column of the patient data frame
pt_data[1,2]
## [1] 98.1
#To refer to every row or every column ,simply leave the row or column portion blank
#TO EXTRACT LL ROWS OF THE FIRST COLUMN
pt_data[,1]
## [1] "John Doe" "James Doe" "Steve Graves"
#to pull data of the first row & third row second & fourth column
pt_data[c(1,3),c(2,4)]
## Temperature gender
## 1 98.1 male
## 3 101.4 male
#to extract every information
pt_data[,]
## subject_name Temperature flu_status gender blood symptoms
## 1 John Doe 98.1 FALSE male O severe
## 2 James Doe 98.6 FALSE Female AB mild
## 3 Steve Graves 101.4 TRUE male A moderate
#note that columns are better accessed by name rather than positions
pt_data[c(1,3),c("Temperature","gender")]
## Temperature gender
## 1 98.1 male
## 3 101.4 male
#creating new columns in a dataframe
pt_data$temp_c<-(pt_data$Temperature-32)*(5/9)
pt_data
## subject_name Temperature flu_status gender blood symptoms temp_c
## 1 John Doe 98.1 FALSE male O severe 36.72222
## 2 James Doe 98.6 FALSE Female AB mild 37.00000
## 3 Steve Graves 101.4 TRUE male A moderate 38.55556
#checking on our added column and to confirm the calculation worked
#lets compare the new celsius -based temp_c column to the previous Farenheit-scale temperature column:
pt_data[c("Temperature","temp_c")]
## Temperature temp_c
## 1 98.1 36.72222
## 2 98.6 37.00000
## 3 101.4 38.55556
#VISUALIZING DATA.PPTX(DATA EXPLORATION) GGPLOT2
#Graphics # to plot data use the function plot() and plot(x,y) produces a scatterplot
#of the numbers in x versus the number in y
# many options can be passed in to the plot() function,passing in the argument xlab will result in alebel on the x-axis
# and will result in a label on the x-axis
#To find out more information about the plot( ) function, type ?plot
#Labels helps viewers to understand what the plotted values are in the X and Y Axis
?plot
## starting httpd help server ... done
# This is generating random data
x<-rnorm (100)
y<-rnorm(100)
plot(x,y)
plot(x,y, xlab=" this is the x-axis",ylab="this is the y-axix",)
main="plot of X vs Y"
# we learn how to visualize data using ggplot2,
#R has several packages used for making grpahs ,ggplotee seems the best
#ggplot2 implements the grammar of the graphics
#mpg data frame, ia data set in rprogramming included in the ggplot2 package,
#that has information about data on fuel economy for various car model