#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Thu Jun 29 16:04:01 2017 @author: Oana @email: oana.dumitru@nyu.edu """ ############################################################################## ############################################################################## # Function Definitions # function to import the excel file by each column def take_excel(column, ibi_series): for row in range (1, sheet.nrows): try: ibi_series.append(str(sheet.cell(row,column))) except: ibi_series.append('NaN') return ibi_series # function to remove empty values from Excel def remove_empty(ibi_series): while "empty:u''" in ibi_series: ibi_series.remove("empty:u''") return ibi_series # function to turn strings into integers def turn_to_integer(ibi_series): for i in range(len(ibi_series)): try: float_ibi = float(ibi_series[i][7:]) ibi_series[i] = float_ibi except: ibi_series[i] = 0 def downsample(series, x, filename): filename = filename[:-5] + str(x) +"_IBI" + ".txt" new_sheet = open(filename, 'w') # make a copy newdata = series ibi = [] # Create the sum series for i in range(1,len(newdata)): newdata[i] += float(newdata[i-1]) # initalize i as 0 i = 0 while i != len(newdata): second = newdata[i]//1000 # make sure there is a previous second if i != 0: previous_second = newdata[i-1]//1000 else: previous_second = 0 # if the first beat jumps trough second 0, make second 0 equal to the beat if i == 0 and second == 1: val = newdata[i] ibi += [val] # new_sheet.write(str(val) + "\n") # if you are not on the last number if i != len(newdata) - 1: next_second = newdata[i + 1]//1000 # does the next beat fall into the same second? if next_second == second: # determine the weights a = (newdata[i] - second*1000)/float(1000) b = (newdata[i + 1] - newdata[i])/float(1000) c = ((second + 1) * 1000 - newdata[i + 1])/float(1000) # determine the beats # if it's the first beat, then use that as the length of the beat if i == 0: k = newdata[i] # if it's not, need to subtract the previous beat from it else: k = newdata[i] - newdata[i - 1] # the second beat is obtained by subtracting the current one l = newdata[i + 1] - newdata[i] # if on the second to last beat, consider the next next beat as 0 if i == len(newdata) - 2: m = 0 # if not, this is the next, next beat else: m = newdata[i + 2] - newdata[i + 1] val = a * k + b * l + c * m # compute the IBI for that second # determine if this beat jumped over 2 seconds instead of 1 if second == previous_second + 2 and i != 0: val2 = newdata[i] - newdata[i - 1] ibi += [val2] # new_sheet.write(str(val2) + "\n") ibi += [val] # new_sheet.write(str(val) + "\n") # # delete the initial second i += 2 # if it doesn't fall into the same second # determine the weights else: a = (newdata[i] - second*1000)/float(1000) b = 1 - a if i == 0: k = newdata[i] else: k = newdata[i] - newdata[i - 1] l = newdata[i + 1] - newdata[i] val = a * k + b * l if second == previous_second + 2 and i != 0: val2 = newdata[i] - newdata[i - 1] ibi += [val2] # new_sheet.write(str(val2) + "\n") ibi += [val] # new_sheet.write(str(val) + "\n") i += 1 else: # this means we are on the last beat if second != newdata[i - 1]//1000: val = newdata[i] - newdata[i - 1] if second == previous_second + 2 and i != 0: val2 = newdata[i] - newdata[i - 1] ibi += [val2] # new_sheet.write(str(val2) + "\n") ibi += [val] # new_sheet.write(str(val) + "\n") else: val = newdata[i] ibi += [val] break # average for 5 seconds if len(ibi) % 5 == 0: for i in range(0,len(ibi),5): final_ibi = (ibi[i] + ibi[i+1] + ibi[i+2] + ibi[i+3] + ibi[i+4])/5 string_ibi = str(final_ibi) + "\n" #new_ibi.append(string_ibi) new_sheet.write(string_ibi) # if it's not divisible by 5, then get rid of the last piece of data else: print(filename) b = len(ibi) % 5 for i in range (0, len(ibi)-b, 5): final_ibi = (ibi[i] + ibi[i+1] + ibi[i+2] + ibi[i+3] + ibi[i+4])/5 string_ibi = str(final_ibi) + "\n" #new_ibi.append(string_ibi) new_sheet.write(string_ibi) new_sheet.close() ############################################################################## # Program Code import glob path = '/Users/Oana/Documents/PGA Resample' filenames = glob.glob(path + '/*.xlsx') for filename in filenames: from xlrd import open_workbook wb = open_workbook(filename) # open the IBI series sheet sheet = wb.sheet_by_name('IBI Series') """ Note: because for the baseline we have a small amount of data for each file, I did not use a for loop to iterate over each ibi series. However, you can do that and you can see how in the other file, called resampling_task. """ # create 6 empty lists ibi_1 = [] ibi_2 = [] ibi_3 = [] ibi_4 = [] ibi_5 = [] ibi_6 = [] # iterate over the columns take_excel(0,ibi_1) take_excel(1,ibi_2) take_excel(2,ibi_3) take_excel(3,ibi_4) take_excel(4,ibi_5) take_excel(5,ibi_6) # remove empty values remove_empty(ibi_1) remove_empty(ibi_2) remove_empty(ibi_3) remove_empty(ibi_4) remove_empty(ibi_5) remove_empty(ibi_6) # turn the numbers from strings to integers turn_to_integer(ibi_1) turn_to_integer(ibi_2) turn_to_integer(ibi_3) turn_to_integer(ibi_4) turn_to_integer(ibi_5) turn_to_integer(ibi_6) # resample all 6 series downsample(ibi_1,1, filename) downsample(ibi_2,2, filename) downsample(ibi_3,3, filename) downsample(ibi_4,4, filename) downsample(ibi_5,5, filename) downsample(ibi_6,6, filename)