As a follow on to my previous post, which was about appending data, the script below prepends historical data to an assumed existing data record.
% cd to the hourly data directory
setwd("~/Documents/octave/oanda_data/hourly")
all_current_historical_data_list = read.table("instrument_hourly_update_file",header=FALSE,sep="",colClasses=c("character","Date","numeric") )
for( ii in 1 : nrow( all_current_historical_data_list ) ) {
instrument = all_current_historical_data_list[ ii , 1 ]
current_ohlc_record = read.table( file = paste( instrument , "raw_OHLC_hourly" , sep = "_" ) , header = FALSE , na = "" , sep = "," ,
stringsAsFactors = FALSE )
current_ohlc_record_begin_date_time = as.character( current_ohlc_record[ 1 , 1 ] ) % get the date/time value to be matched
last_date_ix = as.Date( current_ohlc_record[ 1 , 1 ] ) % the end date for new data to be downloaded
% last 40 weeks of hourly data approx = 5000 hourly bars
begin_date_ix = as.Date( last_date_ix - 280 ) % the begin date for new data to be downloaded
% download the missing historical data from begin_date_ix to last_date_x.
new_historical_data = HisPricesDates( Granularity = "H1", DayAlign, TimeAlign, AccountToken, instrument,
begin_date_ix , last_date_ix + 2 ) % +2 to ensure that the end of the new downloaded data will
% overlap with the beginning of current_ohlc_record
% having ensured no data is missed by overlaping with the current_ohlc_record, delete duplicated OHLC information
new_historical_data_date_times = as.character( new_historical_data[ , 1 ] ) % vector to search for the above date value
ix = charmatch( current_ohlc_record_begin_date_time , new_historical_data_date_times ) % get the matching index value
% delete that part of new_historical_data which is already contained in filename
new_historical_data = new_historical_data[ -( ix : nrow( new_historical_data ) ) , ]
% before prepending new_historical_data in front of current_ohlc_record, need to give names to current_ohlc_record as
% rbind needs to bind by named attributes
names( current_ohlc_record ) = names( new_historical_data )
% see https://stackoverflow.com/questions/11785710/rbind-function-changing-my-entries for reason for following
% also need to coerce that dates in new_historical_data from POSIXct to character
new_historical_data$TimeStamp = as.character( new_historical_data$TimeStamp )
% and now prepend new_historical_data to current_ohlc_record
combined_records = rbind( new_historical_data , current_ohlc_record , stringsAsFactors = FALSE )
% and coerce character dates back to a POSIXct date format prior to printing
combined_records$TimeStamp = as.POSIXct( combined_records$TimeStamp )
% write combined_records to file
write.table( combined_records , file = paste( instrument , "raw_OHLC_hourly" , sep = "_" ) , row.names = FALSE ,
col.names = FALSE , sep = "," )
added_data_length = nrow( new_historical_data ) % length of added new data
% and amend Instrument_update file with lastest update information
all_current_historical_data_list[ ii , 3 ] = all_current_historical_data_list[ ii , 3 ] + added_data_length
% write updated Instrument_update_file to file
write.table( all_current_historical_data_list , file = "instrument_hourly_update_file" , row.names = FALSE , col.names = FALSE )
} % end of for all_current_historical_data_list loop
As described in the previous post the function HisPricesDates is called to do the actual downloading, with the relevant dates for function input being read and calculated from the existing data file ( I have hard coded for hourly data but this can, of course, be changed or implemented as user input in the R session). As usual I have commented the script to explain what is going on.
However, one important caveat is that it is assumed that there is actually some Oanda data to download and prepend prior the the earliest date in the existing record, and there are no checks of this assumption. Therefore, the script might fail in unexpected ways if one attempts to reach too far back in history for the prependable data.