# (c) 2018, SAS Institute Inc.

import sys
import os
import numpy as np
import pandas as pd
import logging

# ('option_instrument_bi_result', '/local/install/pa/data/1053187354/mk_opt', DataFrame)
irm_task_output_sas7b_tuple = ()
num_col = []
dt_col = []
str_col = []


def init_logger(log_file_path, debug):
    if os.path.isfile(log_file_path):
        os.remove(log_file_path)
    
    dir, name, ext = irm_split_filepath(log_file_path)
    global logger
    level = logging.INFO
    if debug.upper() == 'TRUE':
        level = logging.DEBUG
    
    logging.basicConfig(level=level,
                        format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
                        datefmt='%m-%d %H:%M',
                        filename=log_file_path,
                        filemode='w')
    
    logger = logging.getLogger(__name__)
    
    # fh = logging.FileHandler(log_file_path)
    # formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    # fh.setLevel(logging.DEBUG)
    # fh.setFormatter(formatter)
    # logger.addHandler(fh)


def irm_set_task_output_sas7b(name, dir, df):
    "Output: retcode"
    global irm_task_output_sas7b_tuple
    irm_task_output_sas7b_tuple = (name, dir, df)
    return 0,


# irm_split_filepath('/local/fa.sample.3.6/landing_area/03312019/cashflows.sas7bdat') => ('/local/fa.sample.3.6/landing_area/03312019', 'cashflows', '.sas7bdat')
def irm_split_filepath(path):
    "Output: dir, name, ext"
    dir, filename = os.path.split(path)
    name, ext = os.path.splitext(filename)
    return dir.lower(), name.lower(), ext.lower()


def irm_load_pytask_module(task_code_path):
    "Output: module"
    mod_dir, mod_name, mod_ext = irm_split_filepath(task_code_path)
    if sys.version_info[0] == 3 and sys.version_info[1] >= 5:
        import importlib.util
        spec = importlib.util.spec_from_file_location(mod_name, task_code_path)
        task_mod = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(task_mod)
    elif sys.version_info[0] == 3 and sys.version_info[1] <= 4:
        from importlib.machinery import SourceFileLoader
        task_mod = SourceFileLoader(mod_name, task_code_path).load_module()
    elif sys.version_info[0] == 2:
        import imp
        task_mod = imp.load_source(mod_name, task_code_path)
    else:
        raise Exception('Unsupported python version: {}'.format(sys.version))
    return task_mod


def normalize_bytestr_columns(df):
    logger.debug('Entering convert_bytestr_columns(). df.head(): {}'.format(df.head()))
    for col in df.dtypes.index:
        print('col: {}'.format(col))
        if np.issubdtype(df.dtypes[col], np.object_):
            print('str col: {}'.format(col))
            df[col].where(df[col].apply(type) != bytes, df[col].str.decode('utf-8'), inplace=True)
    logger.debug('Exiting convert_bytestr_columns(). df.head(): {}'.format(df.head()))
    return df


def load_df_from_file(data_path):
    "Output: data_name, ext, df"
    logger.debug('Loading DataFrame from {}.'.format(data_path))
    data_dir, data_name, ext = irm_split_filepath(data_path)
    if ext == '.sas7bdat':
        df = pd.read_sas(data_path)
        normalize_bytestr_columns(df)
    elif ext == '.pkl':
        df = pd.read_pickle(data_path)
    elif ext == '.csv':
        df = pd.read_csv(data_path)
    elif ext == '.xls':
        df = pd.read_excel(data_path)
    elif ext == '.h5':
        df = pd.read_hdf(data_path, 'table')
    elif ext == '.html':
        df = pd.read_html(data_path)
    elif ext == '.feather':
        df = pd.read_feather(data_path)
    elif ext == '.parquet':
        df = pd.read_parquet(data_path)
    elif ext == '.msgpack':
        df = pd.read_msgpack(data_path)
    elif ext == '.dta':
        df = pd.read_stata(data_path)
    elif ext == '.json':
        df = pd.read_json(data_path)
    else:
        raise Exception('Unsupported data extension {} in loading file to DataFrame.'.format(data_path))
    logger.debug('Done loading DataFrame from file {}. DataFrame: \n{} '.format(data_path, df.head()))
    return data_name, ext, df


#data_path: [partition:50:cashflows.sas7bdat]/local/install/Config/Lev1/AppData/SASIRM/pa/data/481195141/p_mk_cf/P_MK_CF.CASHFLOWS
def load_partition_df_from_file(data_path):
    "Output: data_name, ext, df"
    logger.debug('Loading DataFrames of partition data from {}.'.format(data_path))
    #
    if not data_path.startswith('[partition:'):
        msg = 'Partition prefix of [partition:<num_partitions>] is expected in path {}.'.format(data_path)
        logger.error(msg)
        raise RuntimeError(msg)
    
    idx = data_path.index(']')
    p_dir_path = data_path[idx+1:]
    part = data_path[1:idx]
    print('idx: {}, partition_str: {}, p_dir_path: {}'.format(idx, part, p_dir_path))
    p_list = part.split(':')
    print('list: {}'.format(p_list))
    cardinality_str = p_list[1]
    file_name = p_list[2]
    max_rank = int(cardinality_str)
    print('cardinality_str: {}, file_name: {}, max_rank: {}'.format(cardinality_str, file_name, max_rank))
    df_dict = {}
    for rank in range(1, max_rank+1):
        print('rank: {}'.format(rank))
        p_file_path = os.path.join(p_dir_path, str(rank), file_name)
        print('p_file_path: {}'.format(p_file_path))
        data_name, ext, df = load_df_from_file(p_file_path)
        df_dict[str(rank)] = df
    
    data_dir, data_name, ext = irm_split_filepath(data_path)
    logger.debug('Done loading DataFrames of partition data from {}.'.format(data_path))
    return data_name, ext, df_dict


def save_df_to_file(df, data_path):
    "Output: retcode"
    logger.debug('Saving DataFrame to file {}.'.format(data_path))
    data_dir, data_file = os.path.split(data_path)
    data_name, ext = os.path.splitext(data_file)
    data_name = data_name.lower()
    ext = ext.lower()
    if ext == '.pkl':
        logger.debug('calling to_pickle ...')
        df.to_pickle(data_path)
    elif ext == '.csv':
        df.to_csv(data_path, index=False, mode='w')
    elif ext == '.xls':
        df.to_excel(data_path, sheet_name='Sheet1')
    elif ext == '.h5':
        df.to_hdf(data_path, 'save', mode='w', format='table')
    elif ext == '.html':
        df.to_html(data_path)
    elif ext == '.feather':
        df.to_feather(data_path)
    elif ext == '.parquet':
        df = pd.read_parquet(data_path)
    elif ext == '.msgpack':
        df.to_msgpack(data_path)
    elif ext == '.dta':
        df.to_stata(data_path)
    elif ext == '.json':
        df.to_json(data_path)
    else:
        msg = 'Unsupported data extension {} in saving DataFrame to file {}.'.format(ext, data_path)
        logger.error(msg)
        raise Exception(msg)
    logger.debug('Done saving DataFrame to file file {}.'.format(data_path))
    return 0,


# input_list_str and output_list_str:
#     'STAGING.cashflows.sas7bdat=/local/fa.sample.3.5/landing_area/03312017/cashflows.sas7bdat,
#      STAGING.entity.sas7bdat=/local/fa.sample.3.5/landing_area/03312017/entity.sas7bdat'
def irm_exec_pytask(irm_utils_path, irm_module_path_list_str, task_code_path, log_file_path, input_list_str, output_list_str, task_property_list_str, debug):
    "Output: retcode, num_outputs_to_convert"
    
    # initialize the logger first
    init_logger(log_file_path, debug)
    
    task_dir, task_filename = os.path.split(task_code_path)
    logger.debug('Entering irm_exec_pytask: task: {}, inputs: {}, outputs: {}.'.format(task_filename, input_list_str, output_list_str))
    
    try:
        # irm_utils_path = '/local/install/Config/Lev1/AppData/SASIRM/fa.0.3.6/source/python/packages/irm_utils.py'
        logger.debug('Loading irm_utils module {}, python version {}'.format(irm_utils_path, sys.version))
        irm_utils_mod = irm_load_pytask_module(irm_utils_path)
        
        # sys.path.append(irm_util_path); #"/home/local/install/Config/Lev1/AppData/SASIRM/fa.0.3.6/source/python/nodes"
        logger.debug('Preparing irm module paths.')
        mod_path_list = irm_module_path_list_str.split(',')
        for item in mod_path_list:
            sys.path.append(item);
            logger.debug('added module path {}.'.format(item))
        logger.debug('Done preparing irm module paths.')
        
        logger.debug('Loading task module {}, python version {}'.format(task_filename, sys.version))
        task_mod = irm_load_pytask_module(task_code_path)
        
        logger.debug('Preparing input DataFrames for task {}.'.format(task_filename))
        input_list = input_list_str.split(',')
        input_output_df_dict = {}
        for item in input_list:
            item_list = item.split('=')
            three_part_name = item_list[0]
            data_path = item_list[1]
            logger.debug('Converting input data to DataFrame, data: {}, path: {}.'.format(three_part_name, data_path))
            if data_path.startswith('[partition:'):
                data_name, ext, df_dict = load_partition_df_from_file(data_path)
                input_output_df_dict[three_part_name] = df_dict
            else:
                data_name, ext, df = load_df_from_file(data_path)
                input_output_df_dict[three_part_name] = df
            #logger.debug('Done converting input data to DataFrame, data: {}, path: {}, df:\n{}.'.format(three_part_name, data_path, df.head()))
        logger.debug('Done preparing input DataFrames for task {}'.format(task_filename))
        
        logger.debug('Preparing output_data_path_dict and output DataFrame dictionary.')
        output_list = output_list_str.split(',')
        output_data_path_dict = {}
        for item in output_list:
            item_list = item.split('=')
            three_part_name = item_list[0]
            data_path = item_list[1]
            output_data_path_dict[three_part_name] = data_path
            logger.debug('output_data_path_dict[{}] = {}.'.format(three_part_name, data_path))
            input_output_df_dict[three_part_name] = None
            logger.debug('input_output_df_dict[{}] = {}.'.format(three_part_name, None))
        logger.debug('Done preparing output_data_path_dict and output DataFrame dictionary.')
        
        logger.debug('Preparing irm_utils.task_property_dict.')
        if len(task_property_list_str) > 0:
            try:
                task_property_df = irm_utils_mod.create_property_dataframe()
                property_list = task_property_list_str.split(',')
                for item in property_list:
                    item_list = item.split('=')
                    name = item_list[0]
                    value = item_list[1]
                    irm_utils_mod.add_task_property(task_property_df, name, value)
                    logger.debug('added task property {} = {} to task_property_df.'.format(name, value))
                input_output_df_dict['_irm_task_property_dataframe_'] = task_property_df
            except Exception as e:
                msg = 'Caught exception in adding task property. Exception: {}.'.format(e)
                logger.exception(msg)
                raise Exception(msg)
        logger.debug('Done preparing irm_utils.task_property_dict.')
    except Exception as e:
        msg = 'Caught exception in preparing task execution. Exception: {}.'.format(e)
        logger.exception(msg)
        raise Exception(msg)
    
    # logger.debug('Calling irm_run() of task {}.'.format(task_filename))
    # clear the global sas7dat output info
    global irm_task_output_sas7b_tuple
    irm_task_output_sas7b_tuple = ()
    
    # task_dir, task_filename = os.path.split(task_code_path)
    try:
        # input_output_df_dict {'STAGING.cashflows.sas7bdat': df2, 'mk_opt.option_instrument_bi_result.sas7bdat', df2 }
        logger.info('--------------- {} --------------- '.format(task_filename))
        task_mod.irm_run(input_output_df_dict)
    except Exception as e:
        msg = 'Caught exception in calling irm_run(): {}.'.format(e)
        logger.exception(msg)
        logger.debug('Done calling irm_run() of task {}. Task failed.'.format(task_filename))
        return 1, 0
        # logger.debug('Done calling irm_run() of task {}.'.format(task_filename))
    finally:
        logger.info('--------------- {} --------------- '.format(task_filename))
    
    # store sas7bdat DataFrame to global dict; convert output to sas7bdat later
    for three_part_name, df in input_output_df_dict.items():
        if not (three_part_name in output_data_path_dict):
            continue
        if df is None:
            raise Exception('Unexpected None output DataFrame of {} after running task {}.'.
                            format(three_part_name, task_filename))
        data_dir, name, ext = irm_split_filepath(output_data_path_dict[three_part_name])
        logger.debug('Before saving output data_dir: {}, name: {}, ext: {}, '.format(data_dir, name, ext))
        if ext.lower() == '.sas7bdat':  # store to global tuple for later access
            logger.debug('Storing SAS7B output {} ...'.format(three_part_name))
            irm_set_task_output_sas7b(name, data_dir, df)
            logger.debug('Done saving output to SAS dataset. name: {}, dir: {}, df:\n{}.'.format(name, data_dir, df.head()))
        else:  # otherwise; persist DataFrames of other types to files
            try:
                save_df_to_file(df, output_data_path_dict[three_part_name])
            except Exception as e:
                logger.exception('Caught exception in saving output DataFrame to file {}.\n{}'.format(output_data_path_dict[three_part_name]), e)
                raise e
    #
    logger.debug('Exiting irm_exec_pytask:  task: {}, inputs: {}, outputs: {}, task_properties: {}, return: {}.'.format(task_filename, input_list_str, output_list_str, task_property_list_str, (0, len(irm_task_output_sas7b_tuple))))
    return 0, len(irm_task_output_sas7b_tuple)


def irm_get_sas7_output_info():
    "Output: dim, num_col, dt_col, str_col"
    logger.debug('Entering irm_get_sas7_output_info()')
    #
    global irm_task_output_sas7b_tuple
    if len(irm_task_output_sas7b_tuple) == 0:
        msg = 'Unexpected empty irm_task_output_sas7b_tuple data_name.'
        logger.error(msg)
        raise RuntimeError(msg)
    #
    data_name, data_dir, df = irm_task_output_sas7b_tuple
    if (data_name is None or data_dir is None or df is None):
        msg = 'Unexpected None value in  irm_task_output_sas7b_tuple data_name: {} data_dir: {} DataFrame: {}.'.format(data_name, data_dir, df)
        logger.error(msg)
        raise RuntimeError(msg)
    # list.clear() only for python 3.3+
    # num_col.clear()
    # dt_col.clear()
    # str_col.clear()
    del num_col[:]
    del dt_col[:]
    del str_col[:]
    for col in df.dtypes.index:
        if np.issubdtype(df.dtypes[col], np.number):
            num_col.append(col)
        elif np.issubdtype(df.dtypes[col], np.datetime64):
            dt_col.append(col)
        elif np.issubdtype(df.dtypes[col], np.object_):
            str_col.append(col)
        else:  # unsupported variable data type in conversion to SAS7BDAT
            logger.error("Error: unsupported type in conversion to SAS7BDAT. column: {}, dtype: {}", col, df.dtypes[col])
    
    logger.debug('Exiting irm_get_sas7_output_info(). dim={}\nnum_col={}\ndt_col={}\nstr_col={}'.format([df.shape[0], len(num_col), len(dt_col), len(str_col)], num_col, dt_col, str_col))
    return [df.shape[0], len(num_col), len(dt_col), len(str_col)], num_col, dt_col, str_col


def irm_get_sas7_row(index):
    "Output: num_val, dt_val, str_val"
    # logger.debug('Entering irm_get_sas7_row()')
    #
    data_name, data_dir, df = irm_task_output_sas7b_tuple
    num_val = [df[col][index].item() for col in num_col]
    dt_val = [df[col][index].to_pydatetime() for col in dt_col]
    str_val = [df[col][index] if type(df[col][index]) == str else df[col][index].decode() for col in str_col]
    # logger.debug('Exiting irm_get_sas7_row(). \nnum_val={}\ndt_val={}\nstr_val={}'.format(num_val, dt_val, str_val))
    return num_val, dt_val, str_val,

# test run ...
#  and output_list_str:
# task_code_path = '/local/fa.0.3.6/source/python/nodes/analysis/discount_single_cashflow.py'
# log_file_path = '/local/pa/data/2088885823/pythontask_1155804634.log'
# input_list_str = 'IN_INST_DISCOUNT=/local/fa.sample.3.6/landing_area/03312019/cashflows.sas7bdat'
# output_list_str = 'OUT_INST_DISCOUNT=/local/pa/data/2088885823/mk_cf/cashflows.sas7bdat'
# task_property_list_str = ''
# debug = 'TRUE'
# irm_exec_pytask(task_code_path, log_file_path, input_list_str, output_list_str, debug)
