#!/usr/bin/env python 

from scipy import io
import numpy as np
import csv
import matplotlib as mp
import matplotlib.pyplot as plt
from numpy.lib import recfunctions
import datetime


# Open flight .mat file as a python dictionary
def open_mat_file(file_name):
    inMATFile = file_name
    raw_data = io.loadmat(inMATFile)
    return raw_data


# Extract the names of the features (i.e. the keys in the raw python dictionary)
# starting point: 189 features
# Delete 3 specific keys since they are metadata and not associated with the blackbox data
# intermediate total: 186
# Delete all features that have resolution < 1Hz since these are  static and non-relevant
# Final total: 163
def extract_features(raw_data):
    features = raw_data.keys()
    features_scrubbed = []
    del features[137]
    del features[160]
    del features[181]
    number_of_features =  len(features) 
    #print number_of_features
    for key in features:
        data =  raw_data[key]['data'][0][0]
        rate =  raw_data[key]['Rate'][0][0][0][0]
        dtype = raw_data[key]['data'][0][0].dtype
        if rate == 0.25: # sample rate < 1Hz
            #interpolate_to_1hz(data)
            pass
        else: # sample rate > 1Hz
            features_scrubbed.append(key)
    #print len(features_scrubbed)
    return features_scrubbed


# Create recarray with feature vectors. Normalize all feature data vectors to fit the 1Hz resolution
def feature_vector_array(raw_data, features_scrubbed):
    
    # Setup of final array
    arr = np.zeros((len(raw_data['SAT']['data'][0][0])),dtype=[('timestamp', np.str_, 20)]) #'datetime64[us]'
    d_length = len(features_scrubbed)
    t_length = len(raw_data['SAT']['data'][0][0])
    arrs_1d = [0]
    
    # selecting the base timestamp (first record in file) and filling first column of csv with timestamp data based on it
    year       = raw_data['DATE_YEAR']['data'][0][0][0,0]
    month      = raw_data['DATE_MONTH']['data'][0][0][0,0]
    day        = raw_data['DATE_DAY']['data'][0][0][0,0]
    hour       = raw_data['GMT_HOUR']['data'][0][0][0,0]
    minute     = raw_data['GMT_MINUTE']['data'][0][0][0,0]
    sec        = raw_data['GMT_SEC']['data'][0][0][0,0]
    base = datetime.datetime(year, month, day, hour, minute, sec)
    date_list = [base + datetime.timedelta(seconds=x) for x in range(0, t_length)]
    dates = np.array(date_list)
    arr['timestamp'] = dates
    
    # Filling all feature vector columns in the recarray
    for key in features_scrubbed:
        data =  raw_data[key]['data'][0][0][:,0]
        rate =  raw_data[key]['Rate'][0][0][0][0]
        dtype = raw_data[key]['data'][0][0].dtype
        
        # Add column to the recarray with proper dtype
        arr = mp.mlab.rec_append_fields(arr, key, arrs_1d, dtype)

        # Normalize vector to 1Hz by subsampling original vector
        norm_v = data[0:data.size:rate]
        
        # Add normalized feature data vector to added column
        arr[key] = norm_v
        
    return arr


def write_csv_output(arr, file_name):
    mp.mlab.rec2csv(arr, file_name +'.csv')


if __name__ == "__main__":
    file_name = '670200106081534.mat' #'670200106231735.mat' #'652200101092009' #'653201001031841'
    raw_mat_data_dict = open_mat_file(file_name)
    features_scrubbed = extract_features(raw_mat_data_dict)
    array_to_write = feature_vector_array(raw_mat_data_dict, features_scrubbed)
    write_csv_output(array_to_write, file_name)