#!/usr/bin/env python from scipy import io import numpy as np import csv import matplotlib as mp import matplotlib.pyplot as plt from numpy.lib import recfunctions import datetime # Open flight .mat file as a python dictionary def open_mat_file(file_name): inMATFile = file_name raw_data = io.loadmat(inMATFile) return raw_data # Extract the names of the features (i.e. the keys in the raw python dictionary) # starting point: 189 features # Delete 3 specific keys since they are metadata and not associated with the blackbox data # intermediate total: 186 # Delete all features that have resolution < 1Hz since these are static and non-relevant # Final total: 163 def extract_features(raw_data): features = raw_data.keys() features_scrubbed = [] del features[137] del features[160] del features[181] number_of_features = len(features) #print number_of_features for key in features: data = raw_data[key]['data'][0][0] rate = raw_data[key]['Rate'][0][0][0][0] dtype = raw_data[key]['data'][0][0].dtype if rate == 0.25: # sample rate < 1Hz #interpolate_to_1hz(data) pass else: # sample rate > 1Hz features_scrubbed.append(key) #print len(features_scrubbed) return features_scrubbed # Create recarray with feature vectors. Normalize all feature data vectors to fit the 1Hz resolution def feature_vector_array(raw_data, features_scrubbed): # Setup of final array arr = np.zeros((len(raw_data['SAT']['data'][0][0])),dtype=[('timestamp', np.str_, 20)]) #'datetime64[us]' d_length = len(features_scrubbed) t_length = len(raw_data['SAT']['data'][0][0]) arrs_1d = [0] # selecting the base timestamp (first record in file) and filling first column of csv with timestamp data based on it year = raw_data['DATE_YEAR']['data'][0][0][0,0] month = raw_data['DATE_MONTH']['data'][0][0][0,0] day = raw_data['DATE_DAY']['data'][0][0][0,0] hour = raw_data['GMT_HOUR']['data'][0][0][0,0] minute = raw_data['GMT_MINUTE']['data'][0][0][0,0] sec = raw_data['GMT_SEC']['data'][0][0][0,0] base = datetime.datetime(year, month, day, hour, minute, sec) date_list = [base + datetime.timedelta(seconds=x) for x in range(0, t_length)] dates = np.array(date_list) arr['timestamp'] = dates # Filling all feature vector columns in the recarray for key in features_scrubbed: data = raw_data[key]['data'][0][0][:,0] rate = raw_data[key]['Rate'][0][0][0][0] dtype = raw_data[key]['data'][0][0].dtype # Add column to the recarray with proper dtype arr = mp.mlab.rec_append_fields(arr, key, arrs_1d, dtype) # Normalize vector to 1Hz by subsampling original vector norm_v = data[0:data.size:rate] # Add normalized feature data vector to added column arr[key] = norm_v return arr def write_csv_output(arr, file_name): mp.mlab.rec2csv(arr, file_name +'.csv') if __name__ == "__main__": file_name = '670200106081534.mat' #'670200106231735.mat' #'652200101092009' #'653201001031841' raw_mat_data_dict = open_mat_file(file_name) features_scrubbed = extract_features(raw_mat_data_dict) array_to_write = feature_vector_array(raw_mat_data_dict, features_scrubbed) write_csv_output(array_to_write, file_name)