"""
Handles github actions like parameter matrices
"""
import ubelt as ub


def handle_yaml_grid(default, auto, arg):
    """
    Example:
        >>> default = {}
        >>> auto = {}
        >>> arg = ub.codeblock(
        >>>     '''
        >>>     matrix:
        >>>         foo: ['bar', 'baz']
        >>>     include:
        >>>         - {'foo': 'buz', 'bug': 'boop'}
        >>>     ''')
        >>> handle_yaml_grid(default, auto, arg)

        >>> default = {'baz': [1, 2, 3]}
        >>> arg = '''
        >>>     include:
        >>>     - {
        >>>       "thresh": 0.1,
        >>>       "morph_kernel": 3,
        >>>       "norm_ord": 1,
        >>>       "agg_fn": "probs",
        >>>       "thresh_hysteresis": "None",
        >>>       "moving_window_size": "None",
        >>>       "polygon_fn": "heatmaps_to_polys"
        >>>     }
        >>>     '''
        >>> handle_yaml_grid(default, auto, arg)
    """
    stdform_keys = {'matrix', 'include'}
    import ruamel.yaml
    print('arg = {}'.format(ub.urepr(arg, nl=1)))
    if arg:
        if arg is True:
            arg = 'auto'
        if isinstance(arg, str):
            if arg == 'auto':
                arg = auto
            if isinstance(arg, str):
                arg = ruamel.yaml.safe_load(arg)
    else:
        arg = {'matrix': default}
    if isinstance(arg, dict):
        arg = ub.udict(arg)
        if len(arg - stdform_keys) == 0 and (arg & stdform_keys):
            # Standard form
            ...
        else:
            # Transform matrix to standard form
            arg = {'matrix': arg}
    elif isinstance(arg, list):
        # Transform list form to standard form
        arg = {'include': arg}
    else:
        raise TypeError(type(arg))
    assert set(arg.keys()).issubset(stdform_keys)
    print('arg = {}'.format(ub.urepr(arg, nl=1)))
    basis = arg.get('matrix', {})
    if basis:
        grid = list(ub.named_product(basis))
    else:
        grid = []
    grid.extend(arg.get('include', []))
    return grid


def coerce_list_of_action_matrices(arg):
    """
    Preprocess the parameter grid input into a standard form
    """
    import ruamel.yaml
    if isinstance(arg, str):
        data = ruamel.yaml.safe_load(arg)
    else:
        data = arg.copy()
    if isinstance(data, dict):
        pass
    action_matrices = []
    if isinstance(data, list):
        for item in data:
            action_matrices.append(item)
    elif isinstance(data, dict):
        if not len(ub.udict(data) & {'matrix', 'include'}):
            data = {'matrix': data}
        action_matrices.append(data)
    return action_matrices


def prevalidate_param_grid(arg):
    """
    Determine if something may go wrong
    """

    def validate_pathlike(p):
        if isinstance(p, str):
            p = ub.Path(p)
        else:
            p = ub.Path(p)
        if p.expand().exists():
            return True
        return False

    action_matrices = coerce_list_of_action_matrices(arg)

    # TODO: this doesn't belong in a utils folder.
    src_pathlike_keys = [
        'trk.pxl.model',
        'trk.pxl.data.test_dataset',
        'crop.src',
        'act.pxl.model',
        'act.pxl.data.test_dataset',
    ]

    logs = []

    def log_issue(k, p, msg):
        logs.append((k, p, msg))
        print(f'Key {k} with {p=} {msg}')

    for item in action_matrices:
        matrix = item.get('matrix', {})
        for k in src_pathlike_keys:
            if k in matrix:
                v = matrix[k]
                v = [v] if not ub.iterable(v) else v
                for p in v:
                    if not validate_pathlike(p):
                        log_issue(k, p, 'might not be a valid path')


def expand_param_grid(arg, max_configs=None):
    """
    Our own method for specifying many combinations. Uses the github actions
    method under the hood with our own

    Ignore:
        >>> from geowatch.utils.util_param_grid import *  # NOQA
        >>> arg = ub.codeblock(
            '''
            - matrix:
                trk.pxl.model: [trk_a, trk_b]
                trk.pxl.data.tta_time: [0, 4]
                trk.pxl.data.set_cover_algo: [None, approx]
                trk.pxl.data.test_dataset: [D4_S2_L8]

                act.pxl.model: [act_a, act_b]
                act.pxl.data.test_dataset: [D4_WV_PD, D4_WV]
                act.pxl.data.input_space_scale: [1GSD, 4GSD]

                trk.poly.thresh: [0.17]
                act.poly.thresh: [0.13]

                exclude:
                  #
                  # The BAS A should not run with tta
                  - trk.pxl.model: trk_a
                    trk.pxl.data.tta_time: 4
                  # The BAS B should not run without tta
                  - trk.pxl.model: trk_b
                    trk.pxl.data.tta_time: 0
                  #
                  # The SC B should not run on the PD dataset when GSD is 1
                  - act.pxl.model: act_b
                    act.pxl.data.test_dataset: D4_WV_PD
                    act.pxl.data.input_space_scale: 1GSD
                  # The SC A should not run on the WV dataset when GSD is 4
                  - act.pxl.model: act_a
                    act.pxl.data.test_dataset: D4_WV
                    act.pxl.data.input_space_scale: 4GSD
                  #
                  # The The BAS A and SC B model should not run together
                  - trk.pxl.model: trk_a
                    act.pxl.model: act_b
                  # Other misc exclusions to make the output cleaner
                  - trk.pxl.model: trk_b
                    act.pxl.data.input_space_scale: 4GSD
                  - trk.pxl.data.set_cover_algo: None
                    act.pxl.data.input_space_scale: 1GSD

                include:
                  # only try the 10GSD scale for trk model A
                  - trk.pxl.model: trk_a
                    trk.pxl.data.input_space_scale: 10GSD
            ''')
        >>> grid_items = list(expand_param_grid(arg))
        >>> print('grid_items = {}'.format(ub.urepr(grid_items, nl=1, sort=0)))
        >>> from geowatch.utils.util_dotdict import dotdict_to_nested
        >>> print(ub.urepr([dotdict_to_nested(p) for p in grid_items], nl=-3, sort=0))
        >>> print(len(grid_items))
    """
    prevalidate_param_grid(arg)
    action_matrices = coerce_list_of_action_matrices(arg)
    num_yeilded = 0
    for item in action_matrices:
        for grid_item in extended_github_action_matrix(item):
            yield grid_item
            num_yeilded += 1
            if max_configs is not None:
                if num_yeilded >= max_configs:
                    return


def github_action_matrix(arg):
    """
    Implements the github action matrix strategy exactly as described.

    Unless I've implemented something incorrectly, I believe this method is
    limited and have extended it in :func:`extended_github_action_matrix`.

    Args:
        arg (Dict | str): a dictionary or a yaml file that resolves to a
            dictionary containing the keys "matrix", which maps parameters to a
            list of possible values. For convinieince if a single scalar value
            is detected it is converted to a list of 1 item. The matrix may
            also include an "include" and "exclude" item, which are lists of
            dictionaries that modify existing / add new matrix configurations
            or remove them. The "include" and "exclude" parameter can also be
            specified at the same level of "matrix" for convinience.

    Yields:
        item : a single entry in the grid.

    References:
        https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs#expanding-or-adding-matrix-configurations

    CommandLine:
        xdoctest -m geowatch.utils.util_param_grid github_action_matrix:2

    Example:
        >>> from geowatch.utils.util_param_grid import *  # NOQA
        >>> arg = ub.codeblock(
                 '''
                   matrix:
                     fruit: [apple, pear]
                     animal: [cat, dog]
                     include:
                       - color: green
                       - color: pink
                         animal: cat
                       - fruit: apple
                         shape: circle
                       - fruit: banana
                       - fruit: banana
                         animal: cat
                 ''')
        >>> grid_items = list(github_action_matrix(arg))
        >>> print('grid_items = {}'.format(ub.urepr(grid_items, nl=1)))
        grid_items = [
            {'fruit': 'apple', 'animal': 'cat', 'color': 'pink', 'shape': 'circle'},
            {'fruit': 'apple', 'animal': 'dog', 'color': 'green', 'shape': 'circle'},
            {'fruit': 'pear', 'animal': 'cat', 'color': 'pink'},
            {'fruit': 'pear', 'animal': 'dog', 'color': 'green'},
            {'fruit': 'banana'},
            {'fruit': 'banana', 'animal': 'cat'},
        ]

    Example:
        >>> from geowatch.utils.util_param_grid import *  # NOQA
        >>> arg = ub.codeblock(
                '''
                  matrix:
                    os: [macos-latest, windows-latest]
                    version: [12, 14, 16]
                    environment: [staging, production]
                    exclude:
                      - os: macos-latest
                        version: 12
                        environment: production
                      - os: windows-latest
                        version: 16
            ''')
        >>> grid_items = list(github_action_matrix(arg))
        >>> print('grid_items = {}'.format(ub.urepr(grid_items, nl=1)))
        grid_items = [
            {'os': 'macos-latest', 'version': 12, 'environment': 'staging'},
            {'os': 'macos-latest', 'version': 14, 'environment': 'staging'},
            {'os': 'macos-latest', 'version': 14, 'environment': 'production'},
            {'os': 'macos-latest', 'version': 16, 'environment': 'staging'},
            {'os': 'macos-latest', 'version': 16, 'environment': 'production'},
            {'os': 'windows-latest', 'version': 12, 'environment': 'staging'},
            {'os': 'windows-latest', 'version': 12, 'environment': 'production'},
            {'os': 'windows-latest', 'version': 14, 'environment': 'staging'},
            {'os': 'windows-latest', 'version': 14, 'environment': 'production'},
        ]

    Example:
        >>> from geowatch.utils.util_param_grid import *  # NOQA
        >>> arg = ub.codeblock(
                 '''
                 matrix:
                   old_variable:
                       - null
                       - auto
                 include:
                     - old_variable: null
                       new_variable: 1
                     - old_variable: null
                       new_variable: 2
                 ''')
        >>> grid_items = list(github_action_matrix(arg))
        >>> print('grid_items = {}'.format(ub.urepr(grid_items, nl=1)))
    """
    import ruamel.yaml
    if isinstance(arg, str):
        data = ruamel.yaml.safe_load(arg)
    else:
        data = arg.copy()

    matrix = data.pop('matrix', {}).copy()

    include = matrix.pop('include', data.pop('include', []))
    exclude = matrix.pop('exclude', data.pop('exclude', []))
    include = list(map(ub.udict, include))
    exclude = list(map(ub.udict, exclude))

    matrix_ = {k: (v if ub.iterable(v) else [v])
               for k, v in matrix.items()}

    orig_keys = set(matrix.keys())
    include_idx_to_nvariants = {idx: 0 for idx in range(len(include))}

    def include_modifiers(mat_item):
        """
        For each object in the include list, the key:value pairs in the object
        will be added to each of the matrix combinations if none of the
        key:value pairs overwrite any of the original matrix values. If the
        object cannot be added to any of the matrix combinations, a new matrix
        combination will be created instead. Note that the original matrix
        values will not be overwritten, but added matrix values can be
        overwritten.
        """
        grid_item = ub.udict(mat_item)
        for include_idx, include_item in enumerate(include):
            common_orig1 = (mat_item & include_item) & orig_keys
            common_orig2 = (include_item & mat_item) & orig_keys
            if common_orig1 == common_orig2:
                include_idx_to_nvariants[include_idx] += 1
                grid_item = grid_item | include_item
        return grid_item

    def is_excluded(grid_item):
        """
        An excluded configuration only has to be a partial match for it to be
        excluded. For example, the following workflow will run nine jobs: one
        job for each of the 12 configurations, minus the one excluded job that
        matches {os: macos-latest, version: 12, environment: production}, and
        the two excluded jobs that match {os: windows-latest, version: 16}.
        """
        for exclude_item in exclude:
            common1 = exclude_item & grid_item
            if common1:
                common2 = grid_item & exclude_item
                if common1 == common2 == exclude_item:
                    return True

    for mat_item in map(ub.udict, ub.named_product(matrix_)):
        grid_item = include_modifiers(mat_item)
        if not is_excluded(grid_item):
            yield grid_item

    for idx, n in include_idx_to_nvariants.items():
        if n == 0:
            grid_item = include[idx]
            yield grid_item


def extended_github_action_matrix(arg):
    """
    A variant of the github action matrix for our mlops framework that
    overcomes some of the former limitations.

    This keeps the same weird include / exclude semantics, but
    adds an additional "submatrix" component that has the following semantics.

    A submatrices is a list of dictionaries, but each dictionary may have more
    than one value, and are expanded into a list of items, similarly to a
    dictionary. In this respect the submatrix is "resolved" to a list of
    dictionary items just like "include". The difference is that when a
    common elements of a submatrix grid item matches a matrix grid item, it
    updates it with its new values and yields it immediately. Subsequent
    submatrix grid items can yield different variations of this item.
    The actions include rules are then applied on top of this.

    Args:
        arg (Dict | str): See github_action_matrix, but with new submatrices

    Yields:
        item : a single entry in the grid.

    CommandLine:
        xdoctest -m geowatch.utils.util_param_grid extended_github_action_matrix:2

    Example:
        >>> from geowatch.utils.util_param_grid import *  # NOQA
        >>> from geowatch.utils import util_param_grid
        >>> arg = ub.codeblock(
                 '''
                   matrix:
                     fruit: [apple, pear]
                     animal: [cat, dog]
                     submatrix:
                       - color: green
                       - color: pink
                         animal: cat
                       - fruit: apple
                         shape: circle
                       - fruit: banana
                       - fruit: banana
                         animal: cat
                 ''')
        >>> grid_items = list(extended_github_action_matrix(arg))
        >>> print('grid_items = {}'.format(ub.urepr(grid_items, nl=1)))

    Example:
        >>> from geowatch.utils.util_param_grid import *  # NOQA
        >>> arg = ub.codeblock(
                '''
                  matrix:
                    os: [macos-latest, windows-latest]
                    version: [12, 14, 16]
                    environment: [staging, production]
                    exclude:
                      - os: macos-latest
                        version: 12
                        environment: production
                      - os: windows-latest
                        version: 16
            ''')
        >>> grid_items = list(extended_github_action_matrix(arg))
        >>> print('grid_items = {}'.format(ub.urepr(grid_items, nl=1)))

    Example:
        >>> from geowatch.utils.util_param_grid import *  # NOQA
        >>> from geowatch.utils import util_param_grid
        >>> # Specifying an explicit list of things to run
        >>> arg = ub.codeblock(
                 '''
                 submatrices:
                    - common_variable: a
                      old_variable: a
                    - common_variable: a
                      old_variable: null
                      new_variable: 1
                    - common_variable: a
                      old_variable: null
                      new_variable: 11
                    - common_variable: a
                      old_variable: null
                      new_variable: 2
                    - common_variable: b
                      old_variable: null
                      new_variable: 22
                 ''')
        >>> grid_items = list(extended_github_action_matrix(arg))
        >>> print('grid_items = {}'.format(ub.urepr(grid_items, nl=1)))
        >>> assert len(grid_items) == 5

    Example:
        >>> from geowatch.utils.util_param_grid import *  # NOQA
        >>> from geowatch.utils import util_param_grid
        >>> arg = ub.codeblock(
                 '''
                 matrix:
                   common_variable:
                       - a
                       - b
                   old_variable:
                       - null
                       - auto
                 submatrices:
                     - old_variable: null
                       new_variable1:
                           - 1
                           - 2
                       new_variable2:
                           - 3
                           - 4
                     - old_variable: null
                       new_variable2:
                           - 33
                           - 44
                     # These wont be used because blag doesn't exist
                     - old_variable: blag
                       new_variable:
                           - 10
                           - 20
                 ''')
        >>> grid_items = list(extended_github_action_matrix(arg))
        >>> print('grid_items = {}'.format(ub.urepr(grid_items, nl=1)))
        >>> assert len(grid_items) == 14

    Example:
        >>> from geowatch.utils.util_param_grid import *  # NOQA
        >>> from geowatch.utils import util_param_grid
        >>> arg = ub.codeblock(
                 '''
                 matrix:
                   step1.src:
                       - dset1
                       - dset2
                       - dset3
                       - dset4
                   step1.resolution:
                       - 10
                       - 20
                       - 30
                 submatrices1:
                    - step1.resolution: 10
                      step2.resolution: [10, 15]
                    - step1.resolution: 20
                      step2.resolution: 20
                 submatrices2:
                    - step1.src: dset1
                      step2.src: big_dset1A
                    - step1.src: dset2
                      step2.src:
                         - big_dset2A
                         - big_dset2B
                    - step1.src: dset3
                      step2.src: big_dset3A
                 ''')
        >>> grid_items = list(extended_github_action_matrix(arg))
        >>> print('grid_items = {}'.format(ub.urepr(grid_items, nl=1)))
        >>> assert len(grid_items) == 20
    """
    import ruamel.yaml
    from kwutil.util_yaml import Yaml
    import os
    if isinstance(arg, str):
        data = ruamel.yaml.safe_load(arg)
    else:
        data = arg.copy()

    matrix = data.pop('matrix', {}).copy()

    include = matrix.pop('include', data.pop('include', []))
    exclude = matrix.pop('exclude', data.pop('exclude', []))

    submatrices = matrix.pop('submatrices', data.pop('submatrices', []))
    submatrices = list(map(ub.udict, submatrices))

    include = list(map(ub.udict, include))
    exclude = list(map(ub.udict, exclude))

    def coerce_matrix_value(v):
        if not ub.iterable(v):
            v = [v]
        final = []
        for item in v:
            if isinstance(item, (str, os.PathLike)) and str(item).endswith(('.yaml', '.yml')):
                # use Yaml.coerce instead?
                final.extend(Yaml.load(item))
            else:
                final.append(item)
        return final

    # Special submatrices for more cartesian products, it would be good to come
    # up with a solution that does not require hard coded and a fixed number of
    # variables.
    numbered_submatrices = [
        matrix.pop('submatrices1', data.pop('submatrices1', [])),
        matrix.pop('submatrices2', data.pop('submatrices2', [])),
        matrix.pop('submatrices3', data.pop('submatrices3', [])),
        matrix.pop('submatrices4', data.pop('submatrices4', [])),
        matrix.pop('submatrices5', data.pop('submatrices5', [])),
        matrix.pop('submatrices6', data.pop('submatrices6', [])),
        matrix.pop('submatrices7', data.pop('submatrices7', [])),
        matrix.pop('submatrices8', data.pop('submatrices8', [])),
        matrix.pop('submatrices9', data.pop('submatrices9', [])),
    ]

    MULTI_SUBMATRICES = 1
    if MULTI_SUBMATRICES:
        # Try allowing for more variations. The idea is we effectively
        # want to take the cross product of multiple lists of submatrices.
        multi_submatrices = [submatrices] + numbered_submatrices
        multi_submatrices_ = []
        for submats in multi_submatrices:
            submats[:] = list(map(ub.udict, submats))
            submats_ = []
            for submatrix in submats:
                submatrix_ = {k: coerce_matrix_value(v)
                              for k, v in submatrix.items()}
                submats_.extend(list(map(ub.udict, ub.named_product(submatrix_))))
            multi_submatrices_.append(submats_)
    else:
        submatrices_ = []
        for submatrix in submatrices:
            submatrix_ = {k: coerce_matrix_value(v)
                          for k, v in submatrix.items()}
            submatrices_.extend(list(map(ub.udict, ub.named_product(submatrix_))))

    if len(data) != 0:
        raise Exception(f'Unexpected top level keys: {list(data.keys())}')

    matrix_ = {k: coerce_matrix_value(v)
               for k, v in matrix.items()}

    orig_keys = set(matrix.keys())
    include_idx_to_nvariants = {idx: 0 for idx in range(len(include))}

    def include_modifiers(mat_item):
        """
        For each object in the include list, the key:value pairs in the object
        will be added to each of the matrix combinations if none of the
        key:value pairs overwrite any of the original matrix values. If the
        object cannot be added to any of the matrix combinations, a new matrix
        combination will be created instead. Note that the original matrix
        values will not be overwritten, but added matrix values can be
        overwritten.
        """
        grid_item = ub.udict(mat_item)
        for include_idx, include_item in enumerate(include):
            common_orig1 = (mat_item & include_item) & orig_keys
            common_orig2 = (include_item & mat_item) & orig_keys
            if common_orig1 == common_orig2:
                include_idx_to_nvariants[include_idx] += 1
                grid_item = grid_item | include_item
        return grid_item

    def multisubmatrix_variants(mat_item, multi_submatrices_):
        # New version: every group of submatrices has the opportunity to
        # modify the item before yielding.
        curr_items = [mat_item]
        for submatrices_ in multi_submatrices_:
            curr_items = _submatrix_variants_loop(curr_items, submatrices_)
        yield from curr_items

    def _submatrix_variants_loop(mat_items, submatrices_):
        for item in mat_items:
            yield from submatrix_variants(item, submatrices_)

    def submatrix_variants(mat_item, submatrices_):
        """
        For each object in the include list, the key:value pairs in the object
        will be added to each of the matrix combinations if none of the
        key:value pairs overwrite any of the original matrix values. If the
        object cannot be added to any of the matrix combinations, a new matrix
        combination will be created instead. Note that the original matrix
        values will not be overwritten, but added matrix values can be
        overwritten.
        """
        grid_item = ub.udict(mat_item)
        any_modified = False
        for submat_item in submatrices_:
            common_orig1 = (mat_item & submat_item) & orig_keys
            common_orig2 = (submat_item & mat_item) & orig_keys
            if common_orig1 == common_orig2:
                grid_item = mat_item | submat_item
                yield grid_item
                any_modified = True
        if not any_modified:
            yield grid_item

    def is_excluded(grid_item):
        """
        An excluded configuration only has to be a partial match for it to be
        excluded. For example, the following workflow will run nine jobs: one
        job for each of the 12 configurations, minus the one excluded job that
        matches {os: macos-latest, version: 12, environment: production}, and
        the two excluded jobs that match {os: windows-latest, version: 16}.
        """
        for exclude_item in exclude:
            common1 = exclude_item & grid_item
            if common1:
                common2 = grid_item & exclude_item
                if common1 == common2 == exclude_item:
                    return True

    for mat_item in map(ub.udict, ub.named_product(matrix_)):
        if MULTI_SUBMATRICES:
            submat_gen = multisubmatrix_variants(mat_item, multi_submatrices_)
        else:
            submat_gen = submatrix_variants(mat_item, submatrices_)
        for item in submat_gen:
            item = include_modifiers(item)
            if not is_excluded(item):
                yield item

    for idx, n in include_idx_to_nvariants.items():
        if n == 0:
            grid_item = include[idx]
            yield grid_item