R script to import SCE's CSV with NEM

 # need to use your own file name

# also check how many rows you need to skip reading on top


file_name <- 'use_your_file_name'

skip_r <- 13

output_name <- 'SCE_home1.csv'


temp <- read.csv(file_name, skip = skip_r)

colnames(temp) <- c('date_time','usage','remark')


# adding a remark column to denote if the energy is "delivered / DLVR"

# - meaning from SCE / grid, or "received / RCV" - meaning to SCE / grid.

# need to figure out how to change column names in line to date_time, usage

# and remark instead of changing them after read.csv


temp$usage <- as.numeric(temp$usage)


# convert the usage to numeric instead of char


temp$date_time <- substr(temp$date_time,1,19)


# remove the "to" portion of the date_time


temp_df <- temp %>%

  filter(!grepl('Data', date_time))


# keep only the data that we need - which is rows with date/time info

# and where remark portion shows "Reading quality"

# I'm keeping the "Reading quality" because it is also the starting point

# for the delivered / received header.


# set up iterators 

# max_loop is the total number of rows

# tracking is to determine if it's DLVR (even) or RCV (odd)


max_loop <- nrow(temp_df)

n <- 1

track <- 0

CURR <- 'DLVR'

while (n < max_loop) {

  while (temp_df$remark[n] != "Reading quality") {

    temp_df$remark[n] = CURR

    n <- n+1

    }

  n<-n+1

  track <- track+1

  if (track %% 2 == 0) 

  { CURR <- 'DLVR'} 

  else 

    { CURR <- 'RCV'}

}


# remove all data rows in date_time column that doesn't contact 202*

# which is the starting year.


temp_df <- temp_df %>%

  filter(grepl('202', date_time))


temp_df$date_time <- as.POSIXlt(temp_df$date_time)


write.csv(temp_df,output_name,row.names=FALSE)


No comments:

Post a Comment

Google Data Analytics capstone project

Google Data Analytics capstone project

I decided to use blog as the venue to publish my Google Data Analytics learning because I wasn't sure how I can upload the findings, the...