# need to use your own file name
# also check how many rows you need to skip reading on top
file_name <- 'use_your_file_name'
skip_r <- 13
output_name <- 'SCE_home1.csv'
temp <- read.csv(file_name, skip = skip_r)
colnames(temp) <- c('date_time','usage','remark')
# adding a remark column to denote if the energy is "delivered / DLVR"
# - meaning from SCE / grid, or "received / RCV" - meaning to SCE / grid.
# need to figure out how to change column names in line to date_time, usage
# and remark instead of changing them after read.csv
temp$usage <- as.numeric(temp$usage)
# convert the usage to numeric instead of char
temp$date_time <- substr(temp$date_time,1,19)
# remove the "to" portion of the date_time
temp_df <- temp %>%
filter(!grepl('Data', date_time))
# keep only the data that we need - which is rows with date/time info
# and where remark portion shows "Reading quality"
# I'm keeping the "Reading quality" because it is also the starting point
# for the delivered / received header.
# set up iterators
# max_loop is the total number of rows
# tracking is to determine if it's DLVR (even) or RCV (odd)
max_loop <- nrow(temp_df)
n <- 1
track <- 0
CURR <- 'DLVR'
while (n < max_loop) {
while (temp_df$remark[n] != "Reading quality") {
temp_df$remark[n] = CURR
n <- n+1
}
n<-n+1
track <- track+1
if (track %% 2 == 0)
{ CURR <- 'DLVR'}
else
{ CURR <- 'RCV'}
}
# remove all data rows in date_time column that doesn't contact 202*
# which is the starting year.
temp_df <- temp_df %>%
filter(grepl('202', date_time))
temp_df$date_time <- as.POSIXlt(temp_df$date_time)
write.csv(temp_df,output_name,row.names=FALSE)
No comments:
Post a Comment