Question
syntax : show how to call data of different types in R!
Answer
1. Reading data from Excel
# 1. XLSX
AirQualityUCI <- read_excel("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/AirQualityUCI.xlsx")
head(AirQualityUCI)
## # A tibble: 6 x 15
## Date Time `CO(GT)` `PT08.S1(CO)` `NMHC(GT)`
## <dttm> <dttm> <dbl> <dbl> <dbl>
## 1 2004-03-10 00:00:00 1899-12-31 18:00:00 2.6 1360 150
## 2 2004-03-10 00:00:00 1899-12-31 19:00:00 2 1292. 112
## 3 2004-03-10 00:00:00 1899-12-31 20:00:00 2.2 1402 88
## 4 2004-03-10 00:00:00 1899-12-31 21:00:00 2.2 1376. 80
## 5 2004-03-10 00:00:00 1899-12-31 22:00:00 1.6 1272. 51
## 6 2004-03-10 00:00:00 1899-12-31 23:00:00 1.2 1197 38
## # ... with 10 more variables: C6H6(GT) <dbl>, PT08.S2(NMHC) <dbl>,
## # NOx(GT) <dbl>, PT08.S3(NOx) <dbl>, NO2(GT) <dbl>, PT08.S4(NO2) <dbl>,
## # PT08.S5(O3) <dbl>, T <dbl>, RH <dbl>, AH <dbl>
#visualization
AirQualityUCI.ts <- matrix(AirQualityUCI$`PT08.S2(NMHC)` ,nrow=9357,ncol=1)
AirQualityUCI.ts<- as.vector(t(AirQualityUCI.ts))
AirQualityUCI.ts <- ts(AirQualityUCI.ts, seq(from = as.Date("2004-03-10"), to = as.Date("2004-04-21"), by = 1), frequency=12)
plot(AirQualityUCI.ts,type='o',ylab='C6H6(GT)')
# 2. XLS
ChildImmunizationDataset <- read_excel("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/ChildImmunizationDataset.xls")
head(ChildImmunizationDataset)
## # A tibble: 6 x 8
## MONTHS M_FI_9_11 F_FI_9_11 T_FI_9_11 AEFI_O IS_H AEFI_D IS_ASHA
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 April 14-15 823505 740673 1564178 13787 855344 5 649796
## 2 May 14-15 933746 838778 1772524 14753 886715 7 677537
## 3 June 14-15 1049824 949299 1999123 15243 930836 7 713536
## 4 July 14-15 1080507 981732 2062239 16291 953204 6 730461
## 5 August 14-15 1042367 947543 1989910 16260 920100 8 705477
## 6 September 14-15 1033945 944757 1978702 16863 918137 9 709358
#visualization
ChildImmunizationDataset.ts <- matrix(ChildImmunizationDataset$M_FI_9_11,nrow=57,ncol=1)
ChildImmunizationDataset.ts<- as.vector(t(ChildImmunizationDataset.ts))
ChildImmunizationDataset.ts <- ts(ChildImmunizationDataset.ts, seq(from = as.Date("2015-01-14"), to = as.Date("2019-12-18"), by = 1), frequency=12)
plot(ChildImmunizationDataset.ts,type='o',ylab='M_FI_9_11')
2. Reading data from CSV
DailyDelhiClimateTest <- read.csv("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/DailyDelhiClimateTest.csv", header=TRUE)
head(DailyDelhiClimateTest)
## date meantemp humidity wind_speed meanpressure
## 1 2017-01-01 15.91304 85.86957 2.743478 59.000
## 2 2017-01-02 18.50000 77.22222 2.894444 1018.278
## 3 2017-01-03 17.11111 81.88889 4.016667 1018.333
## 4 2017-01-04 18.70000 70.05000 4.545000 1015.700
## 5 2017-01-05 18.38889 74.94444 3.300000 1014.333
## 6 2017-01-06 19.31818 79.31818 8.681818 1011.773
#visualization
DailyDelhiClimateTest.ts <- matrix(DailyDelhiClimateTest$humidity,nrow=114,ncol=1)
DailyDelhiClimateTest.ts<- as.vector(t(DailyDelhiClimateTest.ts))
DailyDelhiClimateTest.ts <- ts(DailyDelhiClimateTest.ts, seq(from = as.Date("2017-01-01"), to = as.Date("2017-04-24"), by = 1), frequency=12)
plot(DailyDelhiClimateTest.ts,type='o',ylab='humidity')
3. Reading data from TXT
#way 1
household_power_consumption <- read.csv("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/household_power_consumption.txt", header=FALSE, sep=";")
head(household_power_consumption)
## V1 V2 V3 V4 V5
## 1 Date Time Global_active_power Global_reactive_power Voltage
## 2 16/12/2006 17:24:00 4.216 0.418 234.840
## 3 16/12/2006 17:25:00 5.360 0.436 233.630
## 4 16/12/2006 17:26:00 5.374 0.498 233.290
## 5 16/12/2006 17:27:00 5.388 0.502 233.740
## 6 16/12/2006 17:28:00 3.666 0.528 235.680
## V6 V7 V8 V9
## 1 Global_intensity Sub_metering_1 Sub_metering_2 Sub_metering_3
## 2 18.400 0.000 1.000 17.000
## 3 23.000 0.000 1.000 16.000
## 4 23.000 0.000 2.000 17.000
## 5 23.000 0.000 1.000 17.000
## 6 15.800 0.000 1.000 17.000
#way 2
household_power_consumption <- read.delim("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/household_power_consumption.txt", header=FALSE, sep=";")
head(household_power_consumption)
## V1 V2 V3 V4 V5
## 1 Date Time Global_active_power Global_reactive_power Voltage
## 2 16/12/2006 17:24:00 4.216 0.418 234.840
## 3 16/12/2006 17:25:00 5.360 0.436 233.630
## 4 16/12/2006 17:26:00 5.374 0.498 233.290
## 5 16/12/2006 17:27:00 5.388 0.502 233.740
## 6 16/12/2006 17:28:00 3.666 0.528 235.680
## V6 V7 V8 V9
## 1 Global_intensity Sub_metering_1 Sub_metering_2 Sub_metering_3
## 2 18.400 0.000 1.000 17.000
## 3 23.000 0.000 1.000 16.000
## 4 23.000 0.000 2.000 17.000
## 5 23.000 0.000 1.000 17.000
## 6 15.800 0.000 1.000 17.000
#visualization
household.ts <- matrix(household_power_consumption$V5 ,nrow=2075260,ncol=1)
household.ts<- as.vector(t(household.ts))
household.ts <- ts(household.ts, seq(from = as.Date("2006-12-16"), to = as.Date("2006-12-17"), by = 1), frequency=12)
plot(household.ts,type='o',ylab='humidity')
## Warning in xy.coords(x, NULL, log = log, setLab = FALSE): NAs introduced by
## coercion
## Warning in xy.coords(x, y): NAs introduced by coercion
4. Reading data from SPSS
AirQualityUCI <- read_sav("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/AirQualityUCI.sav")
head(AirQualityUCI)
## # A tibble: 6 x 15
## Date Time CO_GT PT08.S1_CO NMHC_GT C6H6_GT PT08.S2_NMHC NOx_GT PT08.S3_NOx
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 10/03/2004 18.00.00 2.6 1360 150 11.9 1046 166 1056
## 2 10/03/2004 19.00.00 2 1292 112 9.4 955 103 1174
## 3 10/03/2004 20.00.00 2.2 1402 88 9 939 131 1140
## 4 10/03/2004 21.00.00 2.2 1376 80 9.2 948 172 1092
## 5 10/03/2004 22.00.00 1.6 1272 51 6.5 836 131 1205
## 6 10/03/2004 23.00.00 1.2 1197 38 4.7 750 89 1337
## # ... with 6 more variables: NO2_GT <dbl>, PT08.S4_NO2 <dbl>, PT08.S5_O3 <dbl>,
## # T <dbl>, RH <dbl>, AH <dbl>
5. Reading data from STATA
airqualitystata <- read_dta("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/airqualitystata.dta")
head(airqualitystata)
## # A tibble: 6 x 17
## A B C D E F G H I J K L M
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 "Date" Time CO_GT PT08~ NMHC~ C6H6~ PT08~ NO_x~ PT08~ NO2_~ PT08~ PT08~ T
## 2 " 3/1~ 18:0~ 2.6 1360 150 11.8~ 1045~ 166 1056~ 113 1692 1267~ 13.5~
## 3 " 3/1~ 19:0~ 2 1292~ 112 9.39~ 954.~ 103 1173~ 92 1558~ 972.~ 13.2~
## 4 " 3/1~ 20:0~ 2.2 1402 88 8.99~ 939.~ 131 1140 114 1554~ 1074 11.9~
## 5 " 3/1~ 21:0~ 2.2 1375~ 80 9.22~ 948.~ 172 1092 122 1583~ 1203~ 11
## 6 " 3/1~ 22:0~ 1.6 1272~ 51 6.51~ 835.5 131 1205 116 1490 1110 11.1~
## # ... with 4 more variables: N <chr>, O <chr>, P <dbl>, Q <dbl>
6. Reading data from JSON
bachelors_degree<- fromJSON(file="D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/bachelors-degree-or-higher-5-year-estimate-in-los-angeles-county-ca_metadata.json")
bachelors_degree <- as.data.frame(bachelors_degree)
print(bachelors_degree)
## realtime_start realtime_end seriess.id seriess.realtime_start
## 1 2019-12-06 2019-12-06 HC01ESTVC1706037 2019-12-06
## seriess.realtime_end
## 1 2019-12-06
## seriess.title
## 1 Bachelor's Degree or Higher (5-year estimate) in Los Angeles County, CA
## seriess.observation_start seriess.observation_end seriess.frequency
## 1 2010-01-01 2017-01-01 Annual
## seriess.frequency_short seriess.units seriess.units_short
## 1 A Percent %
## seriess.seasonal_adjustment seriess.seasonal_adjustment_short
## 1 Not Seasonally Adjusted NSA
## seriess.last_updated seriess.popularity
## 1 2019-02-27 13:42:02-06 11
## seriess.notes
## 1 Estimate of educational attainment for population 18 years old and over whose highest degree was a bachelor’s, master’s, or professional or doctorate degree. (ACS variable S1501_C02_015E from table S1501.)\n\nFor more information about the subject definitions, see: https://www.census.gov/programs-surveys/acs/technical-documentation/code-lists.html.\n\nMultiyear estimates from the American Community Survey (ACS) are "period" estimates derived from a data sample collected over a period of time, as opposed to "point-in-time" estimates such as those from past decennial censuses. ACS 5-year estimates include data collected over a 60-month period. The date associated with the data is the end of the 5-year period. For example, a value dated 2014 represents data from 2010 to 2014. However, the value does not describe any specific day, month, or year within that time period.\n\nMultiyear estimates require some additional considerations. For example, multiyear estimates released in consecutive years consist mostly of overlapping years and shared data. The 2010-2014 ACS 5-year estimates share sample data from 2011 through 2014 with the 2011-2015 ACS 5-year estimates. Because of this overlap, users should use extreme caution in making comparisons with consecutive years of multiyear estimates.\n\nPlease see the ACS handbook (Section 3, "Understanding and Using ACS Single-Year and Multiyear Estimates," p. 13) for a comprehensive set of details and clarifications: https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018.pdf
str(bachelors_degree)
## 'data.frame': 1 obs. of 17 variables:
## $ realtime_start : chr "2019-12-06"
## $ realtime_end : chr "2019-12-06"
## $ seriess.id : chr "HC01ESTVC1706037"
## $ seriess.realtime_start : chr "2019-12-06"
## $ seriess.realtime_end : chr "2019-12-06"
## $ seriess.title : chr "Bachelor's Degree or Higher (5-year estimate) in Los Angeles County, CA"
## $ seriess.observation_start : chr "2010-01-01"
## $ seriess.observation_end : chr "2017-01-01"
## $ seriess.frequency : chr "Annual"
## $ seriess.frequency_short : chr "A"
## $ seriess.units : chr "Percent"
## $ seriess.units_short : chr "%"
## $ seriess.seasonal_adjustment : chr "Not Seasonally Adjusted"
## $ seriess.seasonal_adjustment_short: chr "NSA"
## $ seriess.last_updated : chr "2019-02-27 13:42:02-06"
## $ seriess.popularity : num 11
## $ seriess.notes : chr "Estimate of educational attainment for population 18 years old and over whose highest degree was a bachelor’s, "| __truncated__
7. Reading data from MD
mydata <- readLines("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/README.md")
head(mydata)
## [1] "# COVID-19-Community-Mobility-Dataset"
## [2] "The Dataset has been extracted from google https://www.google.com/covid19/mobility/ for 120+ countries and have been tried to look into deep inside the mobility data. This data may help to understand community mobility inpacts on infection rate both countrywise as well as worldwide."
## [3] "File:"
## [4] "~~~~~"
## [5] "community_dataset_with_infection_count.csv"
## [6] "____________________________________________"