# Simulation of mutated pathways based on real data

In [0]:
import community
import networkx as nx
import matplotlib.pyplot as plt

In [0]:
from scipy.io import loadmat
dataFolder='data/'

# Patients' somatic mutation profiles
somatic = loadmat(dataFolder+'somatic_data_UCEC.mat')
samples_id = [k[0][0][:12] for k in somatic['sample_id']]
# Patients' full phenotypes
phenotypes = loadmat(dataFolder+'UCEC_clinical_phenotype.mat')
patients = [c[0][0] for c in phenotypes['UCECppheno'][0][0][0]]
tmp = [c[0][0] for c in phenotypes['UCECppheno'][0][0][10]]
cancer = [tmp[patients.index(p)] for p in samples_id]
tmp = [c[0][0] for c in phenotypes['UCECppheno'][0][0][17]]
grade = [tmp[patients.index(p)] for p in samples_id]

# Adjacency matrix
network = loadmat(dataFolder+'adj_mat.mat')
# Correspondance between matrices rows number and entrez id
entrez_to_idmat = loadmat(dataFolder+'entrez_to_idmat.mat')

mutations=somatic['gene_indiv_mat']
mutations.shape

net=network['adj_mat']
net.shape

keys=[x[0] for x in entrez_to_idmat['keymat'][0]]
ids=[x[0][0] for x in entrez_to_idmat['entrezid'][0]]
genes = [x[0] for x in somatic['gene_id_all']]

import numpy as np
l=[]
subnet=[]
good=[]
bad=[]
for j,g in enumerate(genes):
    try:
        i=ids.index(g)
        subnet.append(i)
        good.append(j)
    except:
        i=np.nan
        bad.append(j)
    l.append(i)
    
nnet=net[subnet][:,subnet]
nnet=nnet.todense()-np.diag(np.diag(nnet.todense()))
nnnet=np.bmat([[np.matrix(nnet), np.matrix(np.zeros([nnet.shape[0],len(bad)]))], [np.matrix(np.zeros([len(bad),nnet.shape[0]])), np.matrix(np.diagflat(np.zeros(len(bad))))]])


In [0]:
nnnet

In [0]:
PPI = nx.from_numpy_matrix(nnnet)

In [0]:
communities = community.best_partition(PPI)

In [0]:
#drawing
size = float(len(set(communities.values())))
pos = nx.spring_layout(PPI)
count = 0.
for com in set(communities.values()) :
    count = count + 1.
    list_nodes = [nodes for nodes in communities.keys()
                                if communities[nodes] == com]
    nx.draw_networkx_nodes(PPI, pos, list_nodes, node_size = 20,
                                node_color = str(count / size))


nx.draw_networkx_edges(PPI,pos, alpha=0.5)
plt.show()

In [0]:
singletons = 0
sizeComs=[]
for com in set(communities.values()) :
    list_nodes = [nodes for nodes in communities.keys() if communities[nodes] == com]
    sizeCom = len(list_nodes)
    sizeComs.append(sizeCom)
    if sizeCom>1:
        print com, sizeCom
    else:
        singletons=singletons+1
print "singletons", singletons 

In [0]:
for s in np.sort(list(set(sizeComs))):
    print s, np.sum(np.array(s)==sizeComs)

In [0]:
dendo = community.generate_dendogram(PPI)

In [0]:
for level in range(len(dendo)-1) :
    tmp = community.partition_at_level(dendo, level)
    coms = set([tmp[k] for k in tmp.keys()])
    print "partition at level", level, "is", len(coms)