import numpy as np

from .common import TYPES1, Benchmark, get_indexes_rand, get_squares_


class Eindot(Benchmark):
    def setup(self):
        self.a = np.arange(60000.0).reshape(150, 400)
        self.ac = self.a.copy()
        self.at = self.a.T
        self.atc = self.a.T.copy()
        self.b = np.arange(240000.0).reshape(400, 600)
        self.c = np.arange(600)
        self.d = np.arange(400)

        self.a3 = np.arange(480000.).reshape(60, 80, 100)
        self.b3 = np.arange(192000.).reshape(80, 60, 40)

    def time_dot_a_b(self):
        np.dot(self.a, self.b)

    def time_dot_d_dot_b_c(self):
        np.dot(self.d, np.dot(self.b, self.c))

    def time_dot_trans_a_at(self):
        np.dot(self.a, self.at)

    def time_dot_trans_a_atc(self):
        np.dot(self.a, self.atc)

    def time_dot_trans_at_a(self):
        np.dot(self.at, self.a)

    def time_dot_trans_atc_a(self):
        np.dot(self.atc, self.a)

    def time_einsum_i_ij_j(self):
        np.einsum('i,ij,j', self.d, self.b, self.c)

    def time_einsum_ij_jk_a_b(self):
        np.einsum('ij,jk', self.a, self.b)

    def time_einsum_ijk_jil_kl(self):
        np.einsum('ijk,jil->kl', self.a3, self.b3)

    def time_inner_trans_a_a(self):
        np.inner(self.a, self.a)

    def time_inner_trans_a_ac(self):
        np.inner(self.a, self.ac)

    def time_matmul_a_b(self):
        np.matmul(self.a, self.b)

    def time_matmul_d_matmul_b_c(self):
        np.matmul(self.d, np.matmul(self.b, self.c))

    def time_matmul_trans_a_at(self):
        np.matmul(self.a, self.at)

    def time_matmul_trans_a_atc(self):
        np.matmul(self.a, self.atc)

    def time_matmul_trans_at_a(self):
        np.matmul(self.at, self.a)

    def time_matmul_trans_atc_a(self):
        np.matmul(self.atc, self.a)

    def time_tensordot_a_b_axes_1_0_0_1(self):
        np.tensordot(self.a3, self.b3, axes=([1, 0], [0, 1]))


class Linalg(Benchmark):
    params = sorted(set(TYPES1) - {'float16'})
    param_names = ['dtype']

    def setup(self, typename):
        np.seterr(all='ignore')
        self.a = get_squares_()[typename]

    def time_svd(self, typename):
        np.linalg.svd(self.a)

    def time_pinv(self, typename):
        np.linalg.pinv(self.a)

    def time_det(self, typename):
        np.linalg.det(self.a)


class LinalgNorm(Benchmark):
    params = TYPES1
    param_names = ['dtype']

    def setup(self, typename):
        self.a = get_squares_()[typename]

    def time_norm(self, typename):
        np.linalg.norm(self.a)


class LinalgSmallArrays(Benchmark):
    """ Test overhead of linalg methods for small arrays """
    def setup(self):
        self.array_3_3 = np.eye(3) + np.arange(9.).reshape((3, 3))
        self.array_3 = np.arange(3.)
        self.array_5 = np.arange(5.)
        self.array_5_5 = np.reshape(np.arange(25.), (5, 5))

    def time_norm_small_array(self):
        np.linalg.norm(self.array_5)

    def time_det_small_array(self):
        np.linalg.det(self.array_5_5)

    def time_det_3x3(self):
        np.linalg.det(self.array_3_3)

    def time_solve_3x3(self):
        np.linalg.solve(self.array_3_3, self.array_3)

    def time_eig_3x3(self):
        np.linalg.eig(self.array_3_3)


class Lstsq(Benchmark):
    def setup(self):
        self.a = get_squares_()['float64']
        self.b = get_indexes_rand()[:100].astype(np.float64)

    def time_numpy_linalg_lstsq_a__b_float64(self):
        np.linalg.lstsq(self.a, self.b, rcond=-1)

class Einsum(Benchmark):
    param_names = ['dtype']
    params = [[np.float32, np.float64]]

    def setup(self, dtype):
        self.one_dim_small = np.arange(600, dtype=dtype)
        self.one_dim = np.arange(3000, dtype=dtype)
        self.one_dim_big = np.arange(480000, dtype=dtype)
        self.two_dim_small = np.arange(1200, dtype=dtype).reshape(30, 40)
        self.two_dim = np.arange(240000, dtype=dtype).reshape(400, 600)
        self.three_dim_small = np.arange(10000, dtype=dtype).reshape(10, 100, 10)
        self.three_dim = np.arange(24000, dtype=dtype).reshape(20, 30, 40)
        # non_contiguous arrays
        self.non_contiguous_dim1_small = np.arange(1, 80, 2, dtype=dtype)
        self.non_contiguous_dim1 = np.arange(1, 4000, 2, dtype=dtype)
        self.non_contiguous_dim2 = np.arange(1, 2400, 2, dtype=dtype).reshape(30, 40)

        non_contiguous_dim3 = np.arange(1, 48000, 2, dtype=dtype)
        self.non_contiguous_dim3 = non_contiguous_dim3.reshape(20, 30, 40)

    # outer(a,b): trigger sum_of_products_contig_stride0_outcontig_two
    def time_einsum_outer(self, dtype):
        np.einsum("i,j", self.one_dim, self.one_dim, optimize=True)

    # multiply(a, b):trigger sum_of_products_contig_two
    def time_einsum_multiply(self, dtype):
        np.einsum("..., ...", self.two_dim_small, self.three_dim, optimize=True)

    # sum and multiply:trigger sum_of_products_contig_stride0_outstride0_two
    def time_einsum_sum_mul(self, dtype):
        np.einsum(",i...->", 300, self.three_dim_small, optimize=True)

    # sum and multiply:trigger sum_of_products_stride0_contig_outstride0_two
    def time_einsum_sum_mul2(self, dtype):
        np.einsum("i...,->", self.three_dim_small, 300, optimize=True)

    # scalar mul: trigger sum_of_products_stride0_contig_outcontig_two
    def time_einsum_mul(self, dtype):
        np.einsum("i,->i", self.one_dim_big, 300, optimize=True)

    # trigger contig_contig_outstride0_two
    def time_einsum_contig_contig(self, dtype):
        np.einsum("ji,i->", self.two_dim, self.one_dim_small, optimize=True)

    # trigger sum_of_products_contig_outstride0_one
    def time_einsum_contig_outstride0(self, dtype):
        np.einsum("i->", self.one_dim_big, optimize=True)

    # outer(a,b): non_contiguous arrays
    def time_einsum_noncon_outer(self, dtype):
        np.einsum("i,j", self.non_contiguous_dim1,
                  self.non_contiguous_dim1, optimize=True)

    # multiply(a, b):non_contiguous arrays
    def time_einsum_noncon_multiply(self, dtype):
        np.einsum("..., ...", self.non_contiguous_dim2,
                  self.non_contiguous_dim3, optimize=True)

    # sum and multiply:non_contiguous arrays
    def time_einsum_noncon_sum_mul(self, dtype):
        np.einsum(",i...->", 300, self.non_contiguous_dim3, optimize=True)

    # sum and multiply:non_contiguous arrays
    def time_einsum_noncon_sum_mul2(self, dtype):
        np.einsum("i...,->", self.non_contiguous_dim3, 300, optimize=True)

    # scalar mul: non_contiguous arrays
    def time_einsum_noncon_mul(self, dtype):
        np.einsum("i,->i", self.non_contiguous_dim1, 300, optimize=True)

    # contig_contig_outstride0_two: non_contiguous arrays
    def time_einsum_noncon_contig_contig(self, dtype):
        np.einsum("ji,i->", self.non_contiguous_dim2,
                  self.non_contiguous_dim1_small, optimize=True)

    # sum_of_products_contig_outstride0_one: non_contiguous arrays
    def time_einsum_noncon_contig_outstride0(self, dtype):
        np.einsum("i->", self.non_contiguous_dim1, optimize=True)


class LinAlgTransposeVdot(Benchmark):
    # Smaller for speed
    # , (128, 128), (256, 256), (512, 512),
    # (1024, 1024)
    params = [[(16, 16), (32, 32),
               (64, 64)], TYPES1]
    param_names = ['shape', 'npdtypes']

    def setup(self, shape, npdtypes):
        self.xarg = np.random.uniform(-1, 1, np.dot(*shape)).reshape(shape)
        self.xarg = self.xarg.astype(npdtypes)
        self.x2arg = np.random.uniform(-1, 1, np.dot(*shape)).reshape(shape)
        self.x2arg = self.x2arg.astype(npdtypes)
        if npdtypes.startswith('complex'):
            self.xarg += self.xarg.T * 1j
            self.x2arg += self.x2arg.T * 1j

    def time_transpose(self, shape, npdtypes):
        np.transpose(self.xarg)

    def time_vdot(self, shape, npdtypes):
        np.vdot(self.xarg, self.x2arg)


class MatmulStrided(Benchmark):
    # some interesting points selected from
    # https://github.com/numpy/numpy/pull/23752#issuecomment-2629521597
    # (m, p, n, batch_size)
    args = [
        (2, 2, 2, 1), (2, 2, 2, 10), (5, 5, 5, 1), (5, 5, 5, 10),
        (10, 10, 10, 1), (10, 10, 10, 10), (20, 20, 20, 1), (20, 20, 20, 10),
        (50, 50, 50, 1), (50, 50, 50, 10),
        (150, 150, 100, 1), (150, 150, 100, 10),
        (400, 400, 100, 1), (400, 400, 100, 10)
    ]

    param_names = ['configuration']

    def __init__(self):
        self.args_map = {
            'matmul_m%03d_p%03d_n%03d_bs%02d' % arg: arg for arg in self.args
        }

        self.params = [list(self.args_map.keys())]

    def setup(self, configuration):
        m, p, n, batch_size = self.args_map[configuration]

        self.a1raw = np.random.rand(batch_size * m * 2 * n).reshape(
            (batch_size, m, 2 * n)
        )

        self.a1 = self.a1raw[:, :, ::2]

        self.a2 = np.random.rand(batch_size * n * p).reshape(
            (batch_size, n, p)
        )

    def time_matmul(self, configuration):
        return np.matmul(self.a1, self.a2)