Data Preprocessing¶
We preprocess and resample the raw data .txt files we downloaded earlier into numpy.
Resample 10Hz and 100Hz data to 1Hz¶
[1]:
import numpy as np
data_path = "Dataset/ZEMA_Hydraulic/"
filenames_input_data_10Hz = ["fs1","fs2"]
filenames_input_data_10Hz = [file + ".txt" for file in filenames_input_data_10Hz]
filenames_input_data_100Hz = ["ps1","ps2","ps3","ps4","ps5","ps6","eps1"]
filenames_input_data_100Hz = [file + ".txt" for file in filenames_input_data_100Hz]
data_input_data_10Hz = np.zeros((2205,600,len(filenames_input_data_10Hz)))
data_input_data_100Hz = np.zeros((2205,6000,len(filenames_input_data_100Hz)))
for id_,file_name in enumerate(filenames_input_data_10Hz):
input_data = np.loadtxt(data_path + file_name, delimiter = "\t")
data_input_data_10Hz[:,:,id_] = input_data.copy()
for id_,file_name in enumerate(filenames_input_data_100Hz):
input_data = np.loadtxt(data_path + file_name, delimiter = "\t")
data_input_data_100Hz[:,:,id_] = input_data.copy()
filenames_input_data_10Hz_resampled = ["res_"+file for file in filenames_input_data_10Hz]
filenames_input_data_100Hz_resampled = ["res_"+file for file in filenames_input_data_100Hz]
#resample 10Hz
resample = np.linspace(0,600-1, num =60,dtype="int")
data_resampled_10Hz=data_input_data_10Hz[:,resample,:]
#resample 100Hz
resample = np.linspace(0,5999, num =60,dtype="int")
data_resampled_100Hz=data_input_data_100Hz[:,resample,:]
#save file
for id_,file_name in enumerate(filenames_input_data_10Hz_resampled):
np.savetxt(data_path+file_name,data_resampled_10Hz[:,:,id_],delimiter='\t')
for id_,file_name in enumerate(filenames_input_data_100Hz_resampled):
np.savetxt(data_path+file_name,data_resampled_100Hz[:,:,id_],delimiter='\t')
Load all the 1Hz data¶
Load all data including the resampled sensors into numpy arrays
[2]:
#save data
datarows = 2205
seq_length = 60
#deal with inputs data
filenames_input_data_1Hz = ["ts1","ts2","ts3","ts4","vs1","se","res_fs1","res_fs2","res_ps1","res_ps2","res_ps3","res_ps4","res_ps5","res_ps6","res_eps1","ce","cp"]
filenames_input_data_1Hz = [file + ".txt" for file in filenames_input_data_1Hz]
filename_target_data = "profile.txt"
data_input_data_1Hz = np.zeros((datarows,seq_length,len(filenames_input_data_1Hz)))
for id_,file_name in enumerate(filenames_input_data_1Hz):
input_data = np.loadtxt(data_path + file_name, delimiter = "\t")
data_input_data_1Hz[:,:,id_] = input_data.copy()
Load the target multi-target, multi-class output data¶
We load them and preprocess into one hot vector
[3]:
#deal with output data now
targets_data = np.loadtxt(data_path+filename_target_data, delimiter = "\t")
#conversion of outputs to one hot
def makeOneHotVectorMap(length):
map_toOneHot ={}
for i in range(length):
oneHot = np.zeros(length)
oneHot[i] = 1
map_toOneHot[i] = oneHot
return map_toOneHot
id2x_dictionaries = []
x2id_dictionaries = []
id2onehot_dictionaries = []
for label in range(targets_data.shape[1]):
label_column = list(set(targets_data[:,label]))
label_column.sort(reverse=True)
id2x_dictionary = {}
x2id_dictionary = {}
id2onehot_dictionary = makeOneHotVectorMap(len(label_column))
for i in range(len(label_column)):
id2x_dictionary[i] = label_column[i]
x2id_dictionary[label_column[i]] = i
id2x_dictionaries+=[id2x_dictionary]
x2id_dictionaries+=[x2id_dictionary]
id2onehot_dictionaries+=[id2onehot_dictionary]
#convert a row into one-hot coded multi-class multi-label
onehot_tensor_output = []
id_output =[]
for row in range(targets_data.shape[0]):
row_output_data= targets_data[row]
onehots_row =[]
id_row =[]
for label in range(row_output_data.shape[0]):
id_ = x2id_dictionaries[label][row_output_data[label]]
onehot= id2onehot_dictionaries[label][id_]
onehots_row =np.append(onehots_row,onehot)
id_row = np.append(id_row,id_)
id_output+=[id_row]
onehot_tensor_output += [onehots_row]
onehot_tensor_output = np.array(onehot_tensor_output)
id_tensor_output = np.array(id_output)
tensor_output = id_tensor_output
all_tensor_output = id_tensor_output
Pickle data¶
[5]:
import os
import pickle
pickle_folder= "pickles"
if os.path.exists(pickle_folder) == False:
os.mkdir(pickle_folder)
#Pickle them
pickle.dump(data_input_data_1Hz, open( pickle_folder+"/data_input_data_1Hz_full.p", "wb" ) )
pickle.dump(data_input_data_10Hz, open( pickle_folder+"/data_input_data_10Hz.p", "wb" ) )
pickle.dump(data_input_data_100Hz, open( pickle_folder+"/data_input_data_100Hz.p", "wb" ) )
pickle.dump(id2onehot_dictionaries, open( pickle_folder+"/id2onehot_dictionaries.p", "wb" ) )
pickle.dump(all_tensor_output, open( pickle_folder+"/zema_outputs.p", "wb" ) )
[ ]: