Responsi 1 ADW

Khusnia Nurul Khikmah (G1501211049)

1/26/2022

Question

syntax : show how to call data of different types in R!

Answer

1. Reading data from Excel

# 1. XLSX
AirQualityUCI <- read_excel("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/AirQualityUCI.xlsx")
head(AirQualityUCI)

## # A tibble: 6 x 15
##   Date                Time                `CO(GT)` `PT08.S1(CO)` `NMHC(GT)`
##   <dttm>              <dttm>                 <dbl>         <dbl>      <dbl>
## 1 2004-03-10 00:00:00 1899-12-31 18:00:00      2.6         1360         150
## 2 2004-03-10 00:00:00 1899-12-31 19:00:00      2           1292.        112
## 3 2004-03-10 00:00:00 1899-12-31 20:00:00      2.2         1402          88
## 4 2004-03-10 00:00:00 1899-12-31 21:00:00      2.2         1376.         80
## 5 2004-03-10 00:00:00 1899-12-31 22:00:00      1.6         1272.         51
## 6 2004-03-10 00:00:00 1899-12-31 23:00:00      1.2         1197          38
## # ... with 10 more variables: C6H6(GT) <dbl>, PT08.S2(NMHC) <dbl>,
## #   NOx(GT) <dbl>, PT08.S3(NOx) <dbl>, NO2(GT) <dbl>, PT08.S4(NO2) <dbl>,
## #   PT08.S5(O3) <dbl>, T <dbl>, RH <dbl>, AH <dbl>

#visualization
AirQualityUCI.ts <- matrix(AirQualityUCI$`PT08.S2(NMHC)` ,nrow=9357,ncol=1)
AirQualityUCI.ts<- as.vector(t(AirQualityUCI.ts))
AirQualityUCI.ts <- ts(AirQualityUCI.ts, seq(from = as.Date("2004-03-10"), to = as.Date("2004-04-21"), by = 1), frequency=12)
plot(AirQualityUCI.ts,type='o',ylab='C6H6(GT)')

# 2. XLS
ChildImmunizationDataset <- read_excel("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/ChildImmunizationDataset.xls")
head(ChildImmunizationDataset)

## # A tibble: 6 x 8
##   MONTHS          M_FI_9_11 F_FI_9_11 T_FI_9_11 AEFI_O   IS_H AEFI_D IS_ASHA
##   <chr>               <dbl>     <dbl>     <dbl>  <dbl>  <dbl>  <dbl>   <dbl>
## 1 April 14-15        823505    740673   1564178  13787 855344      5  649796
## 2 May 14-15          933746    838778   1772524  14753 886715      7  677537
## 3 June 14-15        1049824    949299   1999123  15243 930836      7  713536
## 4 July 14-15        1080507    981732   2062239  16291 953204      6  730461
## 5 August 14-15      1042367    947543   1989910  16260 920100      8  705477
## 6 September 14-15   1033945    944757   1978702  16863 918137      9  709358

#visualization
ChildImmunizationDataset.ts <- matrix(ChildImmunizationDataset$M_FI_9_11,nrow=57,ncol=1)
ChildImmunizationDataset.ts<- as.vector(t(ChildImmunizationDataset.ts))
ChildImmunizationDataset.ts <- ts(ChildImmunizationDataset.ts, seq(from = as.Date("2015-01-14"), to = as.Date("2019-12-18"), by = 1), frequency=12)
plot(ChildImmunizationDataset.ts,type='o',ylab='M_FI_9_11')

2. Reading data from CSV

DailyDelhiClimateTest <- read.csv("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/DailyDelhiClimateTest.csv", header=TRUE)
head(DailyDelhiClimateTest)

##         date meantemp humidity wind_speed meanpressure
## 1 2017-01-01 15.91304 85.86957   2.743478       59.000
## 2 2017-01-02 18.50000 77.22222   2.894444     1018.278
## 3 2017-01-03 17.11111 81.88889   4.016667     1018.333
## 4 2017-01-04 18.70000 70.05000   4.545000     1015.700
## 5 2017-01-05 18.38889 74.94444   3.300000     1014.333
## 6 2017-01-06 19.31818 79.31818   8.681818     1011.773

#visualization
DailyDelhiClimateTest.ts <- matrix(DailyDelhiClimateTest$humidity,nrow=114,ncol=1)
DailyDelhiClimateTest.ts<- as.vector(t(DailyDelhiClimateTest.ts))
DailyDelhiClimateTest.ts <- ts(DailyDelhiClimateTest.ts, seq(from = as.Date("2017-01-01"), to = as.Date("2017-04-24"), by = 1), frequency=12)
plot(DailyDelhiClimateTest.ts,type='o',ylab='humidity')

3. Reading data from TXT

#way 1
household_power_consumption <- read.csv("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/household_power_consumption.txt", header=FALSE, sep=";")
head(household_power_consumption)

##           V1       V2                  V3                    V4      V5
## 1       Date     Time Global_active_power Global_reactive_power Voltage
## 2 16/12/2006 17:24:00               4.216                 0.418 234.840
## 3 16/12/2006 17:25:00               5.360                 0.436 233.630
## 4 16/12/2006 17:26:00               5.374                 0.498 233.290
## 5 16/12/2006 17:27:00               5.388                 0.502 233.740
## 6 16/12/2006 17:28:00               3.666                 0.528 235.680
##                 V6             V7             V8             V9
## 1 Global_intensity Sub_metering_1 Sub_metering_2 Sub_metering_3
## 2           18.400          0.000          1.000         17.000
## 3           23.000          0.000          1.000         16.000
## 4           23.000          0.000          2.000         17.000
## 5           23.000          0.000          1.000         17.000
## 6           15.800          0.000          1.000         17.000

#way 2
household_power_consumption <- read.delim("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/household_power_consumption.txt", header=FALSE, sep=";")
head(household_power_consumption)

##           V1       V2                  V3                    V4      V5
## 1       Date     Time Global_active_power Global_reactive_power Voltage
## 2 16/12/2006 17:24:00               4.216                 0.418 234.840
## 3 16/12/2006 17:25:00               5.360                 0.436 233.630
## 4 16/12/2006 17:26:00               5.374                 0.498 233.290
## 5 16/12/2006 17:27:00               5.388                 0.502 233.740
## 6 16/12/2006 17:28:00               3.666                 0.528 235.680
##                 V6             V7             V8             V9
## 1 Global_intensity Sub_metering_1 Sub_metering_2 Sub_metering_3
## 2           18.400          0.000          1.000         17.000
## 3           23.000          0.000          1.000         16.000
## 4           23.000          0.000          2.000         17.000
## 5           23.000          0.000          1.000         17.000
## 6           15.800          0.000          1.000         17.000

#visualization
household.ts <- matrix(household_power_consumption$V5 ,nrow=2075260,ncol=1)
household.ts<- as.vector(t(household.ts))
household.ts <- ts(household.ts, seq(from = as.Date("2006-12-16"), to = as.Date("2006-12-17"), by = 1), frequency=12)
plot(household.ts,type='o',ylab='humidity')

## Warning in xy.coords(x, NULL, log = log, setLab = FALSE): NAs introduced by
## coercion

## Warning in xy.coords(x, y): NAs introduced by coercion

4. Reading data from SPSS

AirQualityUCI <- read_sav("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/AirQualityUCI.sav")
head(AirQualityUCI)

## # A tibble: 6 x 15
##   Date       Time     CO_GT PT08.S1_CO NMHC_GT C6H6_GT PT08.S2_NMHC NOx_GT PT08.S3_NOx
##   <chr>      <chr>    <dbl>      <dbl>   <dbl>   <dbl>        <dbl>  <dbl>       <dbl>
## 1 10/03/2004 18.00.00   2.6       1360     150    11.9         1046    166        1056
## 2 10/03/2004 19.00.00   2         1292     112     9.4          955    103        1174
## 3 10/03/2004 20.00.00   2.2       1402      88     9            939    131        1140
## 4 10/03/2004 21.00.00   2.2       1376      80     9.2          948    172        1092
## 5 10/03/2004 22.00.00   1.6       1272      51     6.5          836    131        1205
## 6 10/03/2004 23.00.00   1.2       1197      38     4.7          750     89        1337
## # ... with 6 more variables: NO2_GT <dbl>, PT08.S4_NO2 <dbl>, PT08.S5_O3 <dbl>,
## #   T <dbl>, RH <dbl>, AH <dbl>

5. Reading data from STATA

airqualitystata <- read_dta("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/airqualitystata.dta")
head(airqualitystata)

## # A tibble: 6 x 17
##   A      B     C     D     E     F     G     H     I     J     K     L     M    
##   <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 "Date" Time  CO_GT PT08~ NMHC~ C6H6~ PT08~ NO_x~ PT08~ NO2_~ PT08~ PT08~ T    
## 2 " 3/1~ 18:0~ 2.6   1360  150   11.8~ 1045~ 166   1056~ 113   1692  1267~ 13.5~
## 3 " 3/1~ 19:0~ 2     1292~ 112   9.39~ 954.~ 103   1173~ 92    1558~ 972.~ 13.2~
## 4 " 3/1~ 20:0~ 2.2   1402  88    8.99~ 939.~ 131   1140  114   1554~ 1074  11.9~
## 5 " 3/1~ 21:0~ 2.2   1375~ 80    9.22~ 948.~ 172   1092  122   1583~ 1203~ 11   
## 6 " 3/1~ 22:0~ 1.6   1272~ 51    6.51~ 835.5 131   1205  116   1490  1110  11.1~
## # ... with 4 more variables: N <chr>, O <chr>, P <dbl>, Q <dbl>

6. Reading data from JSON

bachelors_degree<- fromJSON(file="D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/bachelors-degree-or-higher-5-year-estimate-in-los-angeles-county-ca_metadata.json")
bachelors_degree <- as.data.frame(bachelors_degree)
print(bachelors_degree)

##   realtime_start realtime_end       seriess.id seriess.realtime_start
## 1     2019-12-06   2019-12-06 HC01ESTVC1706037             2019-12-06
##   seriess.realtime_end
## 1           2019-12-06
##                                                             seriess.title
## 1 Bachelor's Degree or Higher (5-year estimate) in Los Angeles County, CA
##   seriess.observation_start seriess.observation_end seriess.frequency
## 1                2010-01-01              2017-01-01            Annual
##   seriess.frequency_short seriess.units seriess.units_short
## 1                       A       Percent                   %
##   seriess.seasonal_adjustment seriess.seasonal_adjustment_short
## 1     Not Seasonally Adjusted                               NSA
##     seriess.last_updated seriess.popularity
## 1 2019-02-27 13:42:02-06                 11
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             seriess.notes
## 1 Estimate of educational attainment for population 18 years old and over whose highest degree was a bachelor’s, master’s, or professional or doctorate degree. (ACS variable S1501_C02_015E from table S1501.)\n\nFor more information about the subject definitions, see: https://www.census.gov/programs-surveys/acs/technical-documentation/code-lists.html.\n\nMultiyear estimates from the American Community Survey (ACS) are "period" estimates derived from a data sample collected over a period of time, as opposed to "point-in-time" estimates such as those from past decennial censuses. ACS 5-year estimates include data collected over a 60-month period. The date associated with the data is the end of the 5-year period. For example, a value dated 2014 represents data from 2010 to 2014. However, the value does not describe any specific day, month, or year within that time period.\n\nMultiyear estimates require some additional considerations. For example, multiyear estimates released in consecutive years consist mostly of overlapping years and shared data. The 2010-2014 ACS 5-year estimates share sample data from 2011 through 2014 with the 2011-2015 ACS 5-year estimates. Because of this overlap, users should use extreme caution in making comparisons with consecutive years of multiyear estimates.\n\nPlease see the ACS handbook (Section 3, "Understanding and Using ACS Single-Year and Multiyear Estimates," p. 13) for a comprehensive set of details and clarifications: https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018.pdf

str(bachelors_degree)

## 'data.frame':    1 obs. of  17 variables:
##  $ realtime_start                   : chr "2019-12-06"
##  $ realtime_end                     : chr "2019-12-06"
##  $ seriess.id                       : chr "HC01ESTVC1706037"
##  $ seriess.realtime_start           : chr "2019-12-06"
##  $ seriess.realtime_end             : chr "2019-12-06"
##  $ seriess.title                    : chr "Bachelor's Degree or Higher (5-year estimate) in Los Angeles County, CA"
##  $ seriess.observation_start        : chr "2010-01-01"
##  $ seriess.observation_end          : chr "2017-01-01"
##  $ seriess.frequency                : chr "Annual"
##  $ seriess.frequency_short          : chr "A"
##  $ seriess.units                    : chr "Percent"
##  $ seriess.units_short              : chr "%"
##  $ seriess.seasonal_adjustment      : chr "Not Seasonally Adjusted"
##  $ seriess.seasonal_adjustment_short: chr "NSA"
##  $ seriess.last_updated             : chr "2019-02-27 13:42:02-06"
##  $ seriess.popularity               : num 11
##  $ seriess.notes                    : chr "Estimate of educational attainment for population 18 years old and over whose highest degree was a bachelor’s, "| __truncated__

7. Reading data from MD

mydata <- readLines("D:/S2/IPB/STA542 Analisis Deret Waktu/1/Res/README.md")
head(mydata)

## [1] "# COVID-19-Community-Mobility-Dataset"                                                                                                                                                                                                                                                      
## [2] "The Dataset has been extracted from google https://www.google.com/covid19/mobility/ for 120+ countries and have been tried to look into deep inside the mobility data. This data may help to understand community mobility inpacts on infection rate both countrywise as well as worldwide."
## [3] "File:"                                                                                                                                                                                                                                                                                      
## [4] "~~~~~"                                                                                                                                                                                                                                                                                      
## [5] "community_dataset_with_infection_count.csv"                                                                                                                                                                                                                                                 
## [6] "____________________________________________"