#
# Copyright (c) 2019 by SAS Institute Inc., Cary, NC, USA.
#
from __future__ import absolute_import, division, print_function, unicode_literals
import sys
import json
import re

if 2 == sys.version_info[0]:
    text = unicode
else:
    text = str

PYTHON_RESERVED_KEYWORD = ('False', 'None', 'True', 'and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del',
                           'elif', 'else', 'except', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is',
                           'lambda', 'nonlocal', 'not', 'or', 'pass', 'raise', 'return', 'try', 'while', 'with',
                           'yield')


def lower_case_dict_key(collection):
    """
    lower case key in dict
    Args:
        collection: dict

    Returns:
        new dict with lower case key
    """
    if isinstance(collection, dict):
        return {key.lower().strip(): value for key, value in collection.items()}
    else:
        raise ValueError("Illegal argument. Only support dict and list!")


def is_valid_varaible_name(variable_name, variable_type='variable name'):
    suggested_name = variable_name
    if variable_name in PYTHON_RESERVED_KEYWORD:
        suggested_name = get_suggested_variable_name(variable_name)
        return False, 'Illegal ' + variable_type + ' "' + variable_name + '".\n' +\
                      'The ' + variable_type + ' should not be Python reserved keyword.\n' +\
                      'Rename it as "' + suggested_name + '"', suggested_name
    p = re.compile(r'^[a-z_][a-z_0-9]*$')
    if p.match(variable_name):
        return True, 'The ' + variable_type + ' "' + variable_name + '" passed naming validation.', suggested_name
    else:
        suggested_name = get_suggested_variable_name(variable_name)
        return False, 'Illegal ' + variable_type + ' "' + variable_name + \
                      '".\nThe ' + variable_type + ' should start with lower case alphabet or underscore ' + \
                      'and the remaining can be numeric, lower case alphabet or underscore.\n' +\
                      'Rename it as "' + suggested_name + '"', suggested_name


def get_suggested_variable_name(varaible_name):
    new_var_name = varaible_name
    # add underscore for python keyword
    if varaible_name in PYTHON_RESERVED_KEYWORD:
        new_var_name = '_' + new_var_name
    # lower case variable name
    if any(map(text.isupper, text(new_var_name))):
        new_var_name = new_var_name.lower()
    # replace illegal character with underscore
    new_var_name = re.sub(r'[^a-z_0-9]', '_', new_var_name)

    # if start with numeric, replace it with underscore
    new_var_name = re.sub(r'^[0-9]', '_', new_var_name)
    return new_var_name


class JobFlow:
    """ Job flow class

    It generates JSON stream from its recursive data structure.

    """
    DICT_KEY_JOBFLOW_NAME = "jobflow_name"
    DICT_KEY_CATEGORY_ID = "category_id"
    DICT_KEY_TASK_ARRAY = "jobflow_task_array"
    DICT_KEY_SUB_JOBFLOW_ARRAY = "subjobflow_array"
    DICT_KEY_JOBFLOW_REFERENCE_ARRAY = "jobflow_reference_array"
    DICT_KEY_VISUAL_INPUT_TABLE_ARRAY = "visual_input_table_array"
    DICT_KEY_VISUAL_OUTPUT_TABLE_ARRAY = "visual_output_table_array"

    def __init__(self, jobflow_name, category_id):
        """
        Initialization method
        Args:
            jobflow_name: job flow name
            category_id: category ID

        """
        # cleaning up naming
        assert isinstance(jobflow_name, text)
        self.jobflow_name = jobflow_name
        self.task_list = list()
        self.sub_jobflow_list = list()
        self.jobflow_reference_list = list()
        self.visual_input_table_array = list()
        self.visual_output_table_array = list()
        assert isinstance(category_id, text)
        self.categroy_id = category_id

    def add_task(self, task):
        """
        Add task to job flow
        Args:
            task: job flow task

        Returns:
            job flow itself

        """
        assert isinstance(task, Task)
        self.task_list.append(task)
        return self

    def add_sub_jobflow(self, sub_jobflow):
        """
        Add sub job flow to itself
        Args:
            sub_jobflow: sub job flow

        Returns:
            job flow itself

        """
        assert isinstance(sub_jobflow, JobFlow)
        self.sub_jobflow_list.append(sub_jobflow)
        return self

    def add_sub_jobflow_by_reference(self, referenced_jobflow_name, referenced_category_id, is_in_subflow_folder=True):
        """
        Add sub job flow by reference
        Args:
            referenced_jobflow_name:
            referenced_category_id:
            is_in_subflow_folder:

        Returns:
            job flow itself
        """
        new_jobflow_ref = JobFlowReference(referenced_jobflow_name, referenced_category_id, is_in_subflow_folder)
        self.jobflow_reference_list.append(new_jobflow_ref)
        return self

    def add_visual_input_table_array(self, visual_input_table_array):
        """
        Add input table array to job flow for visualization
        Args:
            visual_input_table_array: input data table for visualization purpose

        Returns:
            job flow itself

        """
        assert isinstance(visual_input_table_array, list)
        self.visual_input_table_array = visual_input_table_array
        return self

    def add_visual_output_table_array(self, visual_output_table_array):
        """
        Add output table array to job flow for visualization
        Args:
            visual_output_table_array: output table array for visualization purpose

        Returns:
            job flow itself

        """
        assert isinstance(visual_output_table_array, list)
        self.visual_output_table_array = visual_output_table_array
        return self

    def get_json_stream(self):
        """
        Get json stream of job flow
        Returns:
            json string

        """
        return json.JSONEncoder(indent=4, sort_keys=True).encode(self.build_jobflow_dict())

    def build_jobflow_dict(self):
        """
        Build job flow dictionary structure in recursive way
        Returns:
            job flow in form of Python dictionary

        """
        jobflow_json_dict = dict()
        jobflow_json_dict[JobFlow.DICT_KEY_JOBFLOW_NAME] = self.jobflow_name
        jobflow_json_dict[JobFlow.DICT_KEY_CATEGORY_ID] = self.categroy_id
        # build sub job flow list
        sub_jobflow_json_list = list()
        for sub_jobflow in self.sub_jobflow_list:
            assert isinstance(sub_jobflow, JobFlow)
            sub_jobflow_json_list.append(sub_jobflow.build_jobflow_dict())
        jobflow_json_dict[JobFlow.DICT_KEY_SUB_JOBFLOW_ARRAY] = sub_jobflow_json_list
        # build job flow reference list
        jobflow_ref_json_list = list()
        for flow_ref in self.jobflow_reference_list:
            assert isinstance(flow_ref, JobFlowReference)
            jobflow_ref_json_list.append(flow_ref.build_jobflow_reference_dict())
        jobflow_json_dict[JobFlow.DICT_KEY_JOBFLOW_REFERENCE_ARRAY] = jobflow_ref_json_list
        # build task list
        task_json_list = list()
        for task in self.task_list:
            assert isinstance(task, Task)
            task_json_list.append(task.build_task_dict())
        jobflow_json_dict[JobFlow.DICT_KEY_TASK_ARRAY] = task_json_list
        jobflow_json_dict[JobFlow.DICT_KEY_VISUAL_INPUT_TABLE_ARRAY] = self.visual_input_table_array
        jobflow_json_dict[JobFlow.DICT_KEY_VISUAL_OUTPUT_TABLE_ARRAY] = self.visual_output_table_array

        return jobflow_json_dict


class Task:
    """ Job flow task class

    There is a job flow task for each job flow node. It stores inputs and outputs., validate table substitution and
    generate JSON object

    """
    TASK_EXECUTION_TYPE_COMPUTATIONAL = "computational"
    TASK_EXECUTION_TYPE_RECOMBINE = "recombine"
    TASK_EXECUTION_TYPE_PARTITION = "partition"

    DICT_KEY_TASK_NAME = "task_name"
    DICT_KEY_TASK_SOURCE_TYPE = "task_source_type"
    DICT_KEY_TASK_EXECUTION_TYPE = "task_execution_type"
    DICT_KEY_TASK_FILE = "task_source_file"
    DICT_KEY_CARD_TABLE = "cardinality_table"
    DICT_KEY_SUB_VALUE_ARRAY = "table_substitution_value_array"
    DICT_KEY_SUB_TOKEN_ARRAY = "table_substitution_token_array"
    DICT_KEY_INPUT_TABLE_ARRAY = "input_table_array"
    DICT_KEY_OUTPUT_TABLE_ARRAY = "output_table_array"

    def __init__(self, task_name, task_source_type, task_source_file,
                 input_list=None,
                 output_list=None,
                 substitution_dict=None,
                 default_substitution_dict=None):
        """
        Initialization method
        Args:
            task_name: task name
            task_source_type: task source type. Available value: "java", "sas"
            task_source_file: task source file. If source is SAS, it is SAS file name. If source is java, it is
                                class name in full naming space.
            input_list: list of input table/collection/substitution
            output_list: list of output table/collection/substitution
            substitution_dict: dictionary of substitution from scripting client
            default_substitution_dict: dictionary of default substitution defined in source file

        """
        # initialize to avoid warning
        if input_list is None:
            input_list = list()
        if output_list is None:
            output_list = list()
        if substitution_dict is None:
            substitution_dict = dict()
        if default_substitution_dict is None:
            default_substitution_dict = dict()
        self.task_name = text()
        self.cardinality_table = text()
        self.set_task_name(task_name)
        self.task_source_type = task_source_type
        self.task_source_file = task_source_file
        # computational by default
        self.task_execution_type = Task.TASK_EXECUTION_TYPE_COMPUTATIONAL

        assert isinstance(input_list, list)
        self.input_list = input_list
        assert isinstance(output_list, list)
        self.output_list = output_list

        assert isinstance(substitution_dict, dict)
        assert isinstance(default_substitution_dict, dict)
        lower_case_sub_dict = lower_case_dict_key(substitution_dict)
        lower_case_default_sub_dict = lower_case_dict_key(default_substitution_dict)

        # merge substitution_dict with default_substitution_dict
        merge_substitution_dict = lower_case_default_sub_dict.copy()
        # replace default_substitution_dict if there is new key or key exists in substitution_dict
        for token in lower_case_sub_dict.keys():
            merge_substitution_dict[token] = lower_case_sub_dict[token]

        # validate table substitution usage
        inputs_substitution_dict = self._validate_table_substitution("inputs", self.input_list,
                                                                     merge_substitution_dict)
        outputs_substitution_dict = self._validate_table_substitution("outputs", self.output_list,
                                                                      merge_substitution_dict)
        # Merge dictionary
        self.substitution_dict = inputs_substitution_dict.copy()
        self.substitution_dict.update(outputs_substitution_dict)

    def set_task_name(self, task_name):
        """
        Set task name. Validate if mixed case exists. If true, lower case task name.
        Args:
            task_name: task name

        Returns:
            task it self
        """
        self.task_name = task_name
        return self

    def _validate_table_substitution(self, in_or_out, table_list, sub_dict):
        """
        Validate table substitution:
        If table_list contains token value, check if src_table_substitution_dict contains corresponding table value.

        Args:
            in_or_out: string value, either "inputs" or "outputs"
            table_list: list of table, which may contain substitution table
            sub_dict: dictionary with substitution token as key and table as value

        Returns:
           return a dictionary that used in substitution
        """
        # build token table list
        token_table_list = [t for t in table_list if t.startswith("%")]
        ret_dict = dict()

        if len(sub_dict) == 0:
            assert len(token_table_list) == 0, \
                "For task %s, no %s table value is provided for substitution token - %s" \
                % (self.task_name, in_or_out, token_table_list)
        else:

            for token in token_table_list:
                # validate table_substitution_dict contains the value of substitution table
                assert sub_dict.get(token, False), \
                    "For task %s, %s substitution token table (%s) is not provided value from substitution_dict." \
                    % (self.task_name, in_or_out, token)
                # only keep token defined in token table list
                ret_dict[token] = sub_dict.get(token)

        return ret_dict

    def build_task_dict(self):
        """
        Build JSON object
        Returns:
            task in form of Python dictionary
        """
        task_json_dict = dict()
        task_json_dict[Task.DICT_KEY_TASK_NAME] = self.task_name
        task_json_dict[Task.DICT_KEY_TASK_FILE] = self.task_source_file
        task_json_dict[Task.DICT_KEY_TASK_SOURCE_TYPE] = self.task_source_type
        task_json_dict[Task.DICT_KEY_TASK_EXECUTION_TYPE] = self.task_execution_type
        task_json_dict[Task.DICT_KEY_INPUT_TABLE_ARRAY] = self.input_list
        task_json_dict[Task.DICT_KEY_OUTPUT_TABLE_ARRAY] = self.output_list
        if len(self.cardinality_table) != 0:
            task_json_dict[Task.DICT_KEY_CARD_TABLE] = self.cardinality_table
        # order matters!
        token_list = list()
        value_list = list()
        for key in self.substitution_dict:
            token_list.append(key)
            value_list.append(self.substitution_dict[key])

        task_json_dict[Task.DICT_KEY_SUB_TOKEN_ARRAY] = token_list
        task_json_dict[Task.DICT_KEY_SUB_VALUE_ARRAY] = value_list

        return task_json_dict

    def partition(self, cardinality_table):
        """
        set task as partition task
        Args:
            cardinality_table: cardinality table name

        Returns:
            task itself
        """
        self.task_execution_type = Task.TASK_EXECUTION_TYPE_PARTITION
        assert isinstance(cardinality_table, text)
        self.cardinality_table = cardinality_table.lower()
        return self

    def recombine(self):
        """
        set task as recombine task
        Returns:
            task itself
        """
        self.task_execution_type = Task.TASK_EXECUTION_TYPE_RECOMBINE
        return self


class JobFlowReference:
    DICT_KEY_JOBFLOW_NAME = "jobflow_name"
    DICT_KEY_CATEGORY_ID = "category_id"
    DICT_KEY_IS_IN_SUBFLOW_FOLDER = "is_in_subflow_folder"

    def __init__(self, jobflow_name, category_id, is_in_subflow_folder=True):
        # cleaning up naming
        assert isinstance(jobflow_name, text)
        self.jobflow_name = jobflow_name
        assert isinstance(category_id, text)
        self.categroy_id = category_id
        assert type(is_in_subflow_folder) is bool
        self.is_in_subflow_folder = is_in_subflow_folder

    def build_jobflow_reference_dict(self):
        ref_dict = dict()
        ref_dict[JobFlowReference.DICT_KEY_JOBFLOW_NAME] = self.jobflow_name
        ref_dict[JobFlowReference.DICT_KEY_CATEGORY_ID] = self.categroy_id
        ref_dict[JobFlowReference.DICT_KEY_IS_IN_SUBFLOW_FOLDER] = self.is_in_subflow_folder
        return ref_dict
