'''
Generates .sumstats and .l2.ldscore/.l2.M files used for simulation testing.

'''
from __future__ import division
import numpy as np
import pandas as pd

N_INDIV = 10000
N_SIMS = 1000
N_SNP = 1000
h21 = 0.3
h22 = 0.6


def print_ld(x, fh, M):
    l2 = '.l2.ldscore'
    m = '.l2.M_5_50'
    x.to_csv(fh + l2, sep='\t', index=False, float_format='%.3f')
    print >>open(fh + m, 'wb'), '\t'.join(map(str, M))

    # chr1
    y = x.iloc[0:int(len(x) / 2), ]
    y.to_csv(fh + '1' + l2, sep='\t', index=False, float_format='%.3f')
    print >>open(fh + '1' + m, 'wb'), '\t'.join((str(x / 2) for x in M))

    # chr2
    y = x.iloc[int(len(x) / 2):len(x), ]
    y.to_csv(fh + '2' + l2, sep='\t', index=False, float_format='%.3f')
    print >>open(fh + '2' + m, 'wb'), '\t'.join((str(x / 2) for x in M))

two_ldsc = np.abs(100 * np.random.normal(size=2 * N_SNP)).reshape((N_SNP, 2))
single_ldsc = np.sum(two_ldsc, axis=1).reshape((N_SNP, 1))
M_two = np.sum(two_ldsc, axis=0)
M = np.sum(single_ldsc)
ld = pd.DataFrame({
    'CHR': np.ones(N_SNP),
    'SNP': ['rs' + str(i) for i in xrange(1000)],
    'BP': np.arange(N_SNP)})

# 2 LD Scores 2 files
split_ldsc = ld.copy()
split_ldsc['LD'] = two_ldsc[:, 0]
print_ld(split_ldsc, 'simulate_test/ldscore/twold_firstfile', [M_two[0]])
split_ldsc = ld.copy()
split_ldsc['LD'] = two_ldsc[:, 1]  # both have same colname to test that this is ok
print_ld(split_ldsc, 'simulate_test/ldscore/twold_secondfile', [M_two[1]])

# 1 LD Score 1 file
ldsc = ld.copy()
ldsc['LD'] = single_ldsc
print_ld(ldsc, 'simulate_test/ldscore/oneld_onefile', [M])

# 2 LD Scores 1 file
ldsc = ld.copy()
ldsc['LD1'] = two_ldsc[:, 0]
ldsc['LD2'] = two_ldsc[:, 1]
print_ld(ldsc, 'simulate_test/ldscore/twold_onefile', M_two)

# Weight LD Scores
w_ld = ld.copy()
w_ld['LD'] = np.ones(N_SNP)
w_ld.to_csv('simulate_test/ldscore/w.l2.ldscore',
            index=False, sep='\t', float_format='%.3f')
# split across chromosomes
df = pd.DataFrame({
    'SNP': ['rs' + str(i) for i in xrange(1000)],
    'A1': ['A' for _ in xrange(1000)],
    'A2': ['G' for _ in xrange(1000)],
    'N': np.ones(1000) * N_INDIV
})
for i in xrange(N_SIMS):
    z = np.random.normal(size=N_SNP).reshape((N_SNP,))
    c = np.sqrt(
        1 + N_INDIV * (h21 * two_ldsc[:, 0] / float(M_two[0]) + h22 * two_ldsc[:, 1] / float(M_two[1])))
    z = np.multiply(z, c)
    dfi = df.copy()
    dfi['Z'] = z
    dfi.reindex(np.random.permutation(dfi.index))
    dfi.to_csv('simulate_test/sumstats/' + str(i),
               sep='\t', index=False, float_format='%.3f')