import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
from scipy.stats import gaussian_kde
from pubmed_landscape_src.exploration import find_mask_words

def plot_square(coordinates, lw=1, c='r'):
    """Add squares to a plot defining regions.
    The regions are defined by the `coordinates` ([top, bottom, left, right]). 
    
    Parameters
    ----------
    coordinates : array-like of shape (n_regions, 4)
        Coordinates of the region limits given in the order [top, bottom, left, right].
    lw : float, default=1
        Linewidth.
    c : str, default= 'r'
        Color.
        
    """
    
    top = coordinates[0]
    bottom = coordinates[1]
    left = coordinates[2]
    right = coordinates[3]
    
    assert bottom < top, "Bottom and top coordinates given in the wrong order."
    assert left < right, "Left and right coordinates given in the wrong order."
    
    #horizontal lines
    plt.plot([left, right], [top, top], linewidth=lw, c=c)
    plt.plot([left, right], [bottom, bottom], linewidth=lw, c=c)
    
    #vertical lines
    plt.plot([left, left], [bottom, top], linewidth=lw, c=c)
    plt.plot([right, right], [bottom, top], linewidth=lw, c=c)


def plot_region_names(coordinates, ax, labels, fontsize=5):
    """Add region names to a plot defining regions.
    The regions are defined by the `coordinates` ([top, bottom, left, right]).

    Parameters
    ----------
    coordinates : array-like of shape (n_regions, 4)
        Coordinates of the region limits given in the order [top, bottom, left, right].
    ax :  Axes
    labels : list of str
        Labels for each region.
    fontsize : float, default=5
    """
    
    top = coordinates[:,0]
    bottom = coordinates[:,1]
    left = coordinates[:,2]
    right = coordinates[:,3]
    
    labels_splited = [list(np.flip(x.split())) for x in labels] 
    
    scale_factor_labels=2.5
    
    for i, label in enumerate(labels_splited):
        for j, elem in enumerate(label):
            if i+1 in [7,8,9]:
                if i+1 ==9:
                    ax.text(left[i]-3, (top[i]+bottom[i])/2 + fontsize*j*scale_factor_labels + 5, elem, va='top' , ha='right', fontsize=fontsize, c= 'k')
        
                else:
                    ax.text(left[i]-3, (top[i]+bottom[i])/2 + fontsize*j*scale_factor_labels, elem, va='top' , ha='right', fontsize=fontsize, c= 'k')
                    
            elif i+1 in [10,6]:
                ax.text(right[i], top[i] + fontsize*j*scale_factor_labels, elem, va='bottom' , ha='left', fontsize=fontsize, c= 'k')
                
            elif i+1 == 11:
                ax.text(right[i], top[i] + fontsize*j*scale_factor_labels, elem, va='bottom' , ha='left', fontsize=fontsize, c= 'k')
                
            elif i+1 == 12:
                ax.text(right[i]-35, bottom[i] - 12 + fontsize*j*scale_factor_labels, elem, va='top' , ha='left', fontsize=fontsize, c= 'k')
            
            elif i+1==4:
                ax.text(right[i]+3, (top[i]+bottom[i])/2 + fontsize*j*scale_factor_labels, elem, va='top' , ha='left', fontsize=fontsize, c= 'k')
            elif i+1==5:
                ax.text(right[i], top[i] + fontsize*j*scale_factor_labels + 1, elem, va='bottom' , ha='right', fontsize=fontsize, c= 'k')
                
            else:
                ax.text((left[i]+right[i])/2, top[i] + fontsize*j*scale_factor_labels + 1, elem, va='bottom' , ha='center', fontsize=fontsize, c= 'k')
                
                
def plot_region_numbers(coordinates, ax, labels, region_labels_numbers, fontsize=5):
    """Add region numbers to a plot defining regions.
    The regions are defined by the `coordinates` ([top, bottom, left, right]).

    Parameters
    ----------
    coordinates : array-like of shape (n_regions, 4)
        Coordinates of the region limits given in the order [top, bottom, left, right].
    ax :  Axes
    labels : list of str
        Labels for each region.
    region_labels_numbers : dict
        Dictionary mapping region labels to numbers.
    fontsize : float, default=5
    """

    for i, label in enumerate(labels):
        plt.text(coordinates[i,3], coordinates[i,1], region_labels_numbers[label], va='top' , ha='left', fontsize=6, c= 'r')
        
        
def automatic_coloring(journals, words_may, words_min, list_colors):
    """ Creates coloring based on words appearing in a list of documents.
    It creates an array with colors, assigning a color to each paper depending on whether it contains a word in its journal title from the lists of `words_may` and `words_min` or not. The colors that will be assigned are given in `list_colors`.
    
    IMPORTANT REMARK: if the journal name contains two words belonging to the word list, the color of the word
    located the latest in the list will be assigned to it (first, the first word's color is assigned and then 
    the second overwrites the first).
    
    Parameters
    ----------
    journals : dataframe of str
        Dataframe with the journal names of the papers, or any other corpus where to look for the words.
    words_may : list of str
        List of the words to look for, starting with capital letter.
    words_min : list of str
        List of the words to look for, strating with small letters.
    list_colors : list of str
        List of all the unique colors to assign.
    
    Returns
    -------
    word_colors : dict
        Legend of word-colors (which color has each word)
    journal_colors : array
        Colors for each paper.
        
    See Also
    --------
    improved_coloring 
    
    """
    
    
    N=len(words_may)
    
    dict_colors={}
    word_colors={}
    for i in range(N):
        # I create a dictionary with the legend word-color for informative purpose
        word_colors[words_may[i]]=list_colors[i]
        
        #sub1 is a string with the word in small letters
        sub1=words_min[i]
        #sub2 is a string with the word starting with capital letter
        sub2=words_may[i]
        
        indexes1= journals.str.find(sub1) 
        indexes2= journals.str.find(sub2)

        #information
        #non_1_1 are the indexes of the journal names containing sub1 (the word with small letters)
        non_1_1=indexes1[indexes1!=-1]
        #non_1_2 are the indexes of the journal names containing sub2 (the word starting with capital letter)
        non_1_2=indexes2[indexes2!=-1]
        
        #containing_journals are the journals (the whole name) containing either the word in small letter or starting 
        #with capital letter
        containing_journals=journals[(indexes1!=-1) | (indexes2!=-1)]
        containing_journals=containing_journals.to_numpy()
        
        #unique_containing_j are the unique journal names from containing_journals
        unique_containing_j=np.unique(containing_journals)
        
        #here we assign one color (the same to all) to each unique journal name containing the desired word
        for elem in unique_containing_j:
            dict_colors[elem]=list_colors[i]
    
    #create colors
    journal_colors=np.vectorize(dict_colors.get)(journals)
    
    #add grey to the rest of papers
    journal_colors=np.where(journal_colors==None,'lightgrey', journal_colors)
    journal_colors=np.where(journal_colors=='None','lightgrey', journal_colors)
    
    return word_colors, journal_colors
    

def improved_coloring(journals, dict_words_colors):
    """ Creates coloring based on words appearing in a list of documents.
    It creates an array with colors, assigning a color to each paper depending on whether it contains a word in its journal title from the keys in ` dict_words_colors`. 
    
    IMPORTANT REMARK: if the journal name contains two words belonging to the word list, the color of the word
    located the latest in the list will be assigned to it (first, the first word's color is assigned and then 
    the second overwrites the first).
    
    Parameters
    ----------
    journals : dataframe of str
        Dataframe with the journal names of the papers, or any other corpus where to look for the words.
    dict_words_colors : dict
        Dictionary matching words to colors (legend). The keys are the words and the values are the colors.
    
    
    Returns
    -------
    labels_with_unlabeled : list of str fo len (n_journals)
        List or labels (words) for all instances including label 'unlabeled'.
    colors : array
        Colors for each paper.
            
    See Also
    --------
    automatic_coloring
    
    """
    
    
    words=dict_words_colors.keys()
    labels=np.empty(len(journals))
    
    for i, wrd in enumerate(words):
        
        word_may = wrd.capitalize()
        word_min = ' '+wrd
        
        indexes1 = journals.str.find(word_may) 
        indexes2 = journals.str.find(word_min)
        
        labels = np.where((indexes1!=-1) | (indexes2!=-1), wrd, labels)
    
    #create colors
    colors=np.vectorize(dict_words_colors.get)(labels)
    
    #add grey to the rest of papers
    colors=np.where(colors==None,'lightgrey', colors)
    colors=np.where(colors=='None','lightgrey', colors)
    
    #change 0 for 'unlabeled'
    labels_with_unlabeled=np.where(colors=='lightgrey','unlabeled', labels)
    
    
    return labels_with_unlabeled, colors


def years_coloring(dates, years, dic):
    """Creates colors based on years of publication.
    
    Parameters
    ----------
    dates : pandas DataFrame
        The dataframe column with the date of the paper.
    years: array-like
        A list of all unique years as strings.
    dic: dict
        A dictionary where you have for each year a value in between 0 and 1 for the colormap.
    
    Returns
    -------
    date_colors : array-like
        Array of colors for each paper.
    date_year : array like
        The year contained in the str date of every paper.
    
    """
    
    N=len(years)
    
    dict_colors={}

    date_year=np.zeros(len(dates))
    
    for i in range(N):
        sub1=years[i]

        indexes1= dates.str.find(sub1) 

        non_1_1=indexes1[indexes1!=-1]
        
        date_year[indexes1!=-1]=int(years[i])
        
    #create colors
    date_colors=np.vectorize(dic.get)(date_year)
    
    return date_colors, date_year


def find_cluster_center(tsne, colors, legend, subset = True, subset_size = 500000, rs = 42):
    """Find cluster centers.
    Finds coordinates of the highest density point of points from each label, using gaussian_kde.
    
    Parameters
    ----------
    tsne: array-like of shape (n_points,2)
        t-SNE coordinates.
    colors : array-like of shape (n_points,)
        Color values for the colormap.
    legend : dict
        Legend label-color.
    subset : bool, default= True
         If True, a subset of the dataset is used for the cluster center calculations.
    subset_size : int, default=500000
        Size of the subset of the dataset used for the cluster center calculations.
    rs : int, default= 42
         Random seed.
         
    Returns
    -------
    center_cluster_coordinates_df : dataframe of shape (n_clusters, 2)
        Cluster center coordinates stored in two columns: "x" and "y".
    

    """
    
    words = list(legend.keys())
    unique_colors = np.array(list(legend.values()))
    
    if subset == True:
        np.random.seed(rs)
        assert tsne.shape[0] >= subset_size, "Subset size is smaller than dataset"
        index_subset=np.random.randint(0,tsne.shape[0],subset_size)
        tsne_subset=tsne[index_subset,:]
        colors_subset=colors[index_subset]
        
    else:
        tsne_subset=tsne
        colors_subset=colors
    
    # calculate cluster centers
    center_cluster_coordinates = []
    for i in range(len(words)):
        cluster=tsne_subset[colors_subset==unique_colors[i]]
        assert cluster.shape[0] > 0
        #center with kernel density
        kde = gaussian_kde(cluster.T)
        center_cluster_coordinates.append(cluster[kde(cluster.T).argmax()])
    
    center_cluster_coordinates = np.vstack(center_cluster_coordinates)
    
    center_cluster_coordinates_df = pd.DataFrame(center_cluster_coordinates, index = words, columns = ['x', 'y'])
    
    return center_cluster_coordinates_df


def plot_label_tags(tsne, colors, legend, x_lim, y_lim, ax=None, middle_value = 0, subset = True, subset_size = 500000, rs = 42, fontsize=7, capitalize=True):
    """Plots label tags and a line pointing to the embedding.
    The line from a label tag points to the location with higher points density of that specific label.
    
    
    Parameters
    ----------
    tsne: array-like of shape (n_points,2)
        t-SNE coordinates.
    colors : array-like of shape (n_points,)
        Color values for the colormap.
    legend : dict
        Legend label-color.
    x_lim : tuple (left, right)
        Limits of the x-axis.
    y_lim : tuple (bottom, top)
        Limits of the y-axis.
    ax : axes, optional
        Axes where to draw the figure. If ax=None, axes will be created. 
    middle_value : float, default=0
         The x value to decide which labels go to the left and which go to the right.
    subset : bool, default= True
         If True, a subset of the dataset is used for the cluster center calculations.
    subset_size : int, default=500000
        Size of the subset of the dataset used for the cluster center calculations.
    rs : int, default= 42
         Random seed.
    fontsize: int, default=7
         Fontsize for the labels.
    capitalize : bool, default = True
        If True, it will capitalize the labels.
    
    See Also
    --------
    find_cluster_center
    
    """
    
    assert x_lim[0] < x_lim[1], "xlim values are in the wrong order"
    assert y_lim[0] < y_lim[1], "ylim values are in the wrong order"
    
    if ax is None:
        fig, ax = plt.subplots()

    # calculate cluster centers
    center_cluster_coordinates = find_cluster_center(tsne, colors, legend, subset, subset_size, rs)
    
    # sort by x
    center_cluster_coordinates_left = center_cluster_coordinates[center_cluster_coordinates.x < middle_value].copy()
    center_cluster_coordinates_right = center_cluster_coordinates[center_cluster_coordinates.x >= middle_value].copy()

    # sort by y
    center_cluster_coordinates_left.sort_values(by = 'y', inplace=True, ascending = False)
    center_cluster_coordinates_right.sort_values(by = 'y', inplace=True, ascending = False)
    
    sorted_labels_left = center_cluster_coordinates_left.index.tolist()
    sorted_labels_right = center_cluster_coordinates_right.index.tolist()

    sorted_colors_left = np.vectorize(legend.get)(sorted_labels_left)
    sorted_colors_right = np.vectorize(legend.get)(sorted_labels_right)
    
    if capitalize == True:
        sorted_labels_left = [elem.capitalize() for elem in sorted_labels_left]
        sorted_labels_right = [elem.capitalize() for elem in sorted_labels_right]


    # PLOT
    # left
    n_left=len(sorted_labels_left)
    x=x_lim[0]*np.ones(n_left)
    y=np.linspace(y_lim[1], y_lim[0], n_left)

    for i, colr in enumerate(sorted_colors_left):
        if any( [colr=='black', colr=='#0000A6', colr=='#5A0007', colr=='#4A3B53', colr=='#1B4400',
                 colr=='#004D43', colr=='#013349', colr=='#000035', colr=='#300018', colr=='#001E09',
                 colr=='#372101', colr=='#6508ba'] ):            
            # white colored letters
            ax.text(x[i], y[i], sorted_labels_left[i], c='lightgrey', fontsize=fontsize, ha='right', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_left.x[i]],[y[i],center_cluster_coordinates_left.y[i]], c=colr, linewidth=0.4, clip_on=False)
        else:
            # black colored letters
            ax.text(x[i], y[i], sorted_labels_left[i], c='black', fontsize=fontsize, ha='right', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_left.x[i]],[y[i],center_cluster_coordinates_left.y[i]], c=colr, linewidth=0.4, clip_on=False)

    # right
    n_right=len(sorted_labels_right)
    x=x_lim[1]*np.ones(n_right)
    y=np.linspace(y_lim[1], y_lim[0], n_right)

    for i, colr in enumerate(sorted_colors_right):
        # color blanco
        if any( [colr=='black', colr=='#0000A6', colr=='#5A0007', colr=='#4A3B53', colr=='#1B4400',
                 colr=='#004D43', colr=='#013349', colr=='#000035', colr=='#300018', colr=='#001E09',
                 colr=='#372101', colr=='#6508ba'] ):
            ax.text(x[i], y[i], sorted_labels_right[i], c='lightgrey', fontsize=fontsize, ha='left', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_right.x[i]],[y[i],center_cluster_coordinates_right.y[i]], c=colr, linewidth=0.4, clip_on=False)
        else:
            ax.text(x[i], y[i], sorted_labels_right[i], c='black', fontsize=fontsize, ha='left', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_right.x[i]],[y[i],center_cluster_coordinates_right.y[i]], c=colr, linewidth=0.4, clip_on=False)
            
            
def plot_nsc_label_tags(tsne, colors, legend, x_lim, y_lim, ax=None, middle_value = 0, subset = True, subset_size = 500000, rs = 42, fontsize=7, capitalize=True):
    """Plots label tags and a line pointing to the embedding.
    The line from a label tag points to the location with higher points density of that specific label.
    
    
    Parameters
    ----------
    tsne: array-like of shape (n_points,2)
        t-SNE coordinates.
    colors : array-like of shape (n_points,)
        Color values for the colormap.
    legend : dict
        Legend label-color.
    x_lim : tuple (left, right)
        Limits of the x-axis.
    y_lim : tuple (bottom, top)
        Limits of the y-axis.
    ax : axes, optional
        Axes where to draw the figure. If ax=None, axes will be created. 
    middle_value : float, default=0
         The x value to decide which labels go to the left and which go to the right.
    subset : bool, default= True
         If True, a subset of the dataset is used for the cluster center calculations.
    subset_size : int, default=500000
        Size of the subset of the dataset used for the cluster center calculations.
    rs : int, default= 42
         Random seed.
    fontsize: int, default=7
         fontsize for the labels.
    capitalize : bool, default = True
        If True, it will capitalize the labels.
    
    See Also
    --------
    find_cluster_center
    
    """
    
    assert x_lim[0] < x_lim[1], "xlim values are in the wrong order"
    assert y_lim[0] < y_lim[1], "ylim values are in the wrong order"
    
    if ax is None:
        fig, ax = plt.subplots()

    # calculate cluster centers
    center_cluster_coordinates = find_cluster_center(tsne, colors, legend, subset, subset_size, rs)
    
    # sort by x
    center_cluster_coordinates_left = center_cluster_coordinates[center_cluster_coordinates.x < middle_value].copy()
    center_cluster_coordinates_right = center_cluster_coordinates[center_cluster_coordinates.x >= middle_value].copy()

    # sort by y
    center_cluster_coordinates_left.sort_values(by = 'y', inplace=True, ascending = False)
    center_cluster_coordinates_right.sort_values(by = 'y', inplace=True, ascending = False)
    
    # sort by top & bottom
    y_lim_top_left = 25
    y_lim_bottom_left = -50
    
    y_lim_top_right = 0
    y_lim_bottom_right = -75 #-100

    
    center_cluster_coordinates_left_top = center_cluster_coordinates_left[center_cluster_coordinates_left.y > y_lim_top_left]
    center_cluster_coordinates_left_bottom = center_cluster_coordinates_left[center_cluster_coordinates_left.y <= y_lim_bottom_left]
    center_cluster_coordinates_right_top = center_cluster_coordinates_right[center_cluster_coordinates_right.y > y_lim_top_right]
    center_cluster_coordinates_right_bottom = center_cluster_coordinates_right[center_cluster_coordinates_right.y <= y_lim_bottom_right]
    
    sorted_labels_left_top = center_cluster_coordinates_left_top.index.tolist()
    sorted_labels_left_bottom = center_cluster_coordinates_left_bottom.index.tolist()
    sorted_labels_right_top = center_cluster_coordinates_right_top.index.tolist()
    sorted_labels_right_bottom = center_cluster_coordinates_right_bottom.index.tolist()

    sorted_colors_left_top = np.vectorize(legend.get)(sorted_labels_left_top)
    sorted_colors_left_bottom = np.vectorize(legend.get)(sorted_labels_left_bottom)
    sorted_colors_right_top = np.vectorize(legend.get)(sorted_labels_right_top)
    sorted_colors_right_bottom = np.vectorize(legend.get)(sorted_labels_right_bottom)
    
    
    if capitalize == True:
        sorted_labels_left_top = [elem.capitalize() for elem in sorted_labels_left_top]
        sorted_labels_left_bottom = [elem.capitalize() for elem in sorted_labels_left_bottom]
        sorted_labels_right_top = [elem.capitalize() for elem in sorted_labels_right_top]
        sorted_labels_right_bottom = [elem.capitalize() for elem in sorted_labels_right_bottom]


    # PLOT
    # left top 
    n_left_top=len(sorted_labels_left_top)
    x=x_lim[0]*np.ones(n_left_top)
    y=np.linspace(y_lim[1], y_lim_top_left, n_left_top)

    for i, colr in enumerate(sorted_colors_left_top):
        #manually defining y position of 'Sclerosis'
        if sorted_labels_left_top[i] == 'Sclerosis':
            y[i]=0
        
        # moving down the sensory/retina/olfactory labels
        if sorted_labels_left_top[i] == 'Sensory':
            y[i]-= 15
            
        if sorted_labels_left_top[i] == 'Retina':
            y[i]-= 10
            
        if sorted_labels_left_top[i] == 'Olfactory':
            y[i]-= 5
            
            
        if any( [colr=='black', colr=='#0000A6', colr=='#5A0007', colr=='#4A3B53', colr=='#1B4400',
                 colr=='#004D43', colr=='#013349', colr=='#000035', colr=='#300018', colr=='#001E09',
                 colr=='#372101', colr=='#6508ba', colr=='#6F0062'] ):            
            # white colored letters
            ax.text(x[i], y[i], sorted_labels_left_top[i], c='lightgrey', fontsize=fontsize, ha='right', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_left_top.x[i]],[y[i],center_cluster_coordinates_left_top.y[i]], c=colr, linewidth=0.4)
        else:
            # black colored letters
            ax.text(x[i], y[i], sorted_labels_left_top[i], c='black', fontsize=fontsize, ha='right', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_left_top.x[i]],[y[i],center_cluster_coordinates_left_top.y[i]], c=colr, linewidth=0.4)
    
    # left bottom 
    n_left_bottom=len(sorted_labels_left_bottom)
    x=x_lim[0]*np.ones(n_left_bottom)
    y=np.linspace(y_lim_bottom_left, y_lim[0], n_left_bottom)

    for i, colr in enumerate(sorted_colors_left_bottom):
        if any( [colr=='black', colr=='#0000A6', colr=='#5A0007', colr=='#4A3B53', colr=='#1B4400',
                 colr=='#004D43', colr=='#013349', colr=='#000035', colr=='#300018', colr=='#001E09',
                 colr=='#372101', colr=='#6508ba', colr=='#6F0062'] ):            
            # white colored letters
            ax.text(x[i], y[i], sorted_labels_left_bottom[i], c='lightgrey', fontsize=fontsize, ha='right', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_left_bottom.x[i]],[y[i],center_cluster_coordinates_left_bottom.y[i]], c=colr, linewidth=0.4)
        else:
            # black colored letters
            ax.text(x[i], y[i], sorted_labels_left_bottom[i], c='black', fontsize=fontsize, ha='right', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_left_bottom.x[i]],[y[i],center_cluster_coordinates_left_bottom.y[i]], c=colr, linewidth=0.4)

    
    # right top
    n_right_top=len(sorted_labels_right_top)
    x=x_lim[1]*np.ones(n_right_top)
    y=np.linspace(y_lim[1], y_lim_top_right, n_right_top)

    for i, colr in enumerate(sorted_colors_right_top):
        #manually defining y position of 'Alzheimer'
        if sorted_labels_right_top[i] == 'Alzheimer':
            y[i]=-25
            
        # color blanco
        if any( [colr=='black', colr=='#0000A6', colr=='#5A0007', colr=='#4A3B53', colr=='#1B4400',
                 colr=='#004D43', colr=='#013349', colr=='#000035', colr=='#300018', colr=='#001E09',
                 colr=='#372101', colr=='#6508ba', colr=='#6F0062'] ):
            ax.text(x[i], y[i], sorted_labels_right_top[i], c='lightgrey', fontsize=fontsize, ha='left', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_right_top.x[i]],[y[i],center_cluster_coordinates_right_top.y[i]], c=colr, linewidth=0.4)
        else:
            ax.text(x[i], y[i], sorted_labels_right_top[i], c='black', fontsize=fontsize, ha='left', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_right_top.x[i]],[y[i],center_cluster_coordinates_right_top.y[i]], c=colr, linewidth=0.4)
            
    # right bottom
    n_right_bottom=len(sorted_labels_right_bottom)
    x=x_lim[1]*np.ones(n_right_bottom)
    y=np.linspace(y_lim_bottom_right, y_lim[0], n_right_bottom)

    for i, colr in enumerate(sorted_colors_right_bottom):
        # color blanco
        if any( [colr=='black', colr=='#0000A6', colr=='#5A0007', colr=='#4A3B53', colr=='#1B4400',
                 colr=='#004D43', colr=='#013349', colr=='#000035', colr=='#300018', colr=='#001E09',
                 colr=='#372101', colr=='#6508ba', colr=='#6F0062'] ):
            ax.text(x[i], y[i], sorted_labels_right_bottom[i], c='lightgrey', fontsize=fontsize, ha='left', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_right_bottom.x[i]],[y[i],center_cluster_coordinates_right_bottom.y[i]], c=colr, linewidth=0.4)
        else:
            ax.text(x[i], y[i], sorted_labels_right_bottom[i], c='black', fontsize=fontsize, ha='left', bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            ax.plot([x[i],center_cluster_coordinates_right_bottom.x[i]],[y[i],center_cluster_coordinates_right_bottom.y[i]], c=colr, linewidth=0.4)
            
            
def plot_ml_label_tags(tsne, colors, legend, x_lim, y_lim, abbrv= None, ax=None, middle_value = 0, subset = True, subset_size = 500000, rs = 42, fontsize=7, capitalize=True):
    """Plots label tags and a line pointing to the embedding.
    The line from a label tag points to the location with higher points density of that specific label. The labels can be changed for display in the plot with `abbrv'.
    
    
    Parameters
    ----------
    tsne: array-like of shape (n_points,2)
        t-SNE coordinates.
    colors : array-like of shape (n_points,)
        Color values for the colormap.
    legend : dict
        Legend label-color.
    x_lim : tuple (left, right)
        Limits of the x-axis.
    y_lim : tuple (bottom, top)
        Limits of the y-axis.
    abbrv : dict, default=None
        Dictionary with the abbreviations of the labels for the plot.
    ax : axes, optional
        Axes where to draw the figure. If ax=None, axes will be created. 
    middle_value : float, default=0
         The x value to decide which labels go to the left and which go to the right.
    subset : bool, default= True
         If True, a subset of the dataset is used for the cluster center calculations.
    subset_size : int, default=500000
        Size of the subset of the dataset used for the cluster center calculations.
    rs : int, default= 42
         Random seed.
    fontsize: int, default=7
         fontsize for the labels.
    capitalize : bool, default = True
        If True, it will capitalize the labels.
    
    See Also
    --------
    find_cluster_center
    
    """
    
    assert x_lim[0] < x_lim[1], "xlim values are in the wrong order"
    assert y_lim[0] < y_lim[1], "ylim values are in the wrong order"
    
    if ax is None:
        fig, ax = plt.subplots()

    # calculate cluster centers
    center_cluster_coordinates = find_cluster_center(tsne, colors, legend, subset, subset_size, rs)
    
    # sort by x
    center_cluster_coordinates_left = center_cluster_coordinates[center_cluster_coordinates.x < middle_value].copy()
    center_cluster_coordinates_right = center_cluster_coordinates[center_cluster_coordinates.x >= middle_value].copy()

    # sort by y
    center_cluster_coordinates_left.sort_values(by = 'y', inplace=True, ascending = False)
    center_cluster_coordinates_right.sort_values(by = 'y', inplace=True, ascending = False)
    
    sorted_labels_left = center_cluster_coordinates_left.index.tolist()
    sorted_labels_right = center_cluster_coordinates_right.index.tolist()

    sorted_colors_left = np.vectorize(legend.get)(sorted_labels_left)
    sorted_colors_right = np.vectorize(legend.get)(sorted_labels_right)
    
    if abbrv is not None:
        capitalize = False
        
    if capitalize == True:
        sorted_labels_left = [elem.capitalize() for elem in sorted_labels_left]
        sorted_labels_right = [elem.capitalize() for elem in sorted_labels_right]


    # PLOT
    scale_factor_labels = 4.2
    
    # left
    if abbrv is not None:
        sorted_labels_left = np.vectorize(abbrv.get)(sorted_labels_left)
    labels_left_splited = [x.split() for x in sorted_labels_left]
    n_left=len(np.hstack(labels_left_splited))
    x=x_lim[0]*np.ones(n_left)
    y=np.linspace(y_lim[1], y_lim[0], n_left)
    
    n=0
    for i, colr in enumerate(sorted_colors_left):
        for j, label in enumerate(labels_left_splited[i]):
            #first word of label
            if j==0:
                if any( [colr=='black', colr=='#0000A6', colr=='#5A0007', colr=='#4A3B53', colr=='#1B4400',
                 colr=='#004D43', colr=='#013349', colr=='#000035', colr=='#300018', colr=='#001E09',
                 colr=='#372101', colr=='#6508ba'] ):            
                    # white colored letters
                    ax.text(x[n], y[n], label, c='lightgrey', fontsize=fontsize, ha='right', 
                            bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
                    ax.plot([x[n],center_cluster_coordinates_left.x[i]],[y[n],center_cluster_coordinates_left.y[i]], c=colr, linewidth=0.4, clip_on=False)
                else:
                    # black colored letters
                    ax.text(x[n], y[n], label, c='black', fontsize=fontsize, ha='right', 
                            bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
                    ax.plot([x[n],center_cluster_coordinates_left.x[i]],[y[n],center_cluster_coordinates_left.y[i]], c=colr, linewidth=0.4, clip_on=False)
                    
            # other words of label
            else:
                if any( [colr=='black', colr=='#0000A6', colr=='#5A0007', colr=='#4A3B53', colr=='#1B4400',
                 colr=='#004D43', colr=='#013349', colr=='#000035', colr=='#300018', colr=='#001E09',
                 colr=='#372101', colr=='#6508ba'] ):            
                    # white colored letters
                    ax.text(x[n], y[n-j]-fontsize*j*scale_factor_labels, label, c='lightgrey', fontsize=fontsize, ha='right', 
                            bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
                else:
                    # black colored letters
                    ax.text(x[n], y[n-j]-fontsize*j*scale_factor_labels, label, c='black', fontsize=fontsize, ha='right', 
                            bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            n+=1

    # right
    if abbrv is not None:
        sorted_labels_right = np.vectorize(abbrv.get)(sorted_labels_right)
        
    labels_right_splited = [x.split() for x in sorted_labels_right]
    n_right=len(np.hstack(labels_right_splited))
    x=x_lim[1]*np.ones(n_right)
    y=np.linspace(y_lim[1], y_lim[0], n_right)
    
    n=0
    for i, colr in enumerate(sorted_colors_right):
        for j, label in enumerate(labels_right_splited[i]):
            #first word of label
            if j==0:
                if any( [colr=='black', colr=='#0000A6', colr=='#5A0007', colr=='#4A3B53', colr=='#1B4400',
                 colr=='#004D43', colr=='#013349', colr=='#000035', colr=='#300018', colr=='#001E09',
                 colr=='#372101', colr=='#6508ba'] ):            
                    # white colored letters
                    ax.text(x[n], y[n], label, c='lightgrey', fontsize=fontsize, ha='left', 
                            bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
                    ax.plot([x[n],center_cluster_coordinates_right.x[i]],[y[n],center_cluster_coordinates_right.y[i]], c=colr, linewidth=0.4, clip_on=False)
                else:
                    # black colored letters
                    ax.text(x[n], y[n], label, c='black', fontsize=fontsize, ha='left', 
                            bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
                    ax.plot([x[n],center_cluster_coordinates_right.x[i]],[y[n],center_cluster_coordinates_right.y[i]], c=colr, linewidth=0.4, clip_on=False)
                    
            # other words of label
            else:
                if any( [colr=='black', colr=='#0000A6', colr=='#5A0007', colr=='#4A3B53', colr=='#1B4400',
                 colr=='#004D43', colr=='#013349', colr=='#000035', colr=='#300018', colr=='#001E09',
                 colr=='#372101', colr=='#6508ba'] ):            
                    # white colored letters
                    ax.text(x[n], y[n-j]-fontsize*j*scale_factor_labels, label, c='lightgrey', fontsize=fontsize, ha='left', 
                            bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
                else:
                    # black colored letters
                    ax.text(x[n], y[n-j]-fontsize*j*scale_factor_labels, label, c='black', fontsize=fontsize, ha='left', 
                            bbox=dict(facecolor=colr,edgecolor='None', alpha=0.8, boxstyle='square', pad=0.05))
            n+=1

            
def plot_tsne_colors(tsne, colors, x_lim, y_lim, ax=None, plot_type=None, axis_on = False):
    """Plot t-SNE embedding with colors (by labels).
    
    Parameters
    ----------
    tsne: array-like
        t-SNE coordinates.
    colors : array-like
        Color values for the colormap.
    x_lim : tuple (left, right)
        Limits of the x-axis.
    y_lim : tuple (bottom, top)
        Limits of the y-axis.
    ax : axes, optional
        Axes where to draw the figure. If ax=None, axes will be created.
    plot_type : {None, 'subplot_2', 'subplot_3', 'subplot_3_grey', 'subregion', 'test'}, default=None
        Style of the plot, modifies dotsize and alpha.
    axis_on : bool, default=False
        If True, axis is shown in plot.
    
    """
        
    assert x_lim[0] < x_lim[1], "xlim values are in the wrong order."
    assert y_lim[0] < y_lim[1], "ylim values are in the wrong order."
    
    assert plot_type in [None, 'subplot_2', 'subplot_3', 'subplot_3_grey', 'subregion', 'test', 'pdf ML'], "Not valid `plot_type` value. Choose from [None, 'subplot_2', 'subplot_3', 'subplot_3_grey', 'subregion', 'test', 'pdf ML']."
    
    
    if ax is None:
        fig, ax = plt.subplots()
    
    s_grey = 0.1
    s_color = 0.5
    alpha_grey = 0.2
    alpha_color = 0.2
    
    if plot_type=='subplot_2':
        s_grey = 0.2
        s_color = 0.2
    
    if plot_type == 'subplot_3':
        s_grey = 0.1
        s_color = 0.1

    if plot_type == 'subplot_3_grey':
        s_grey = 0.05
        alpha_grey = 0.01
        s_color = 0.2
        alpha_color = 0.5
    
    if plot_type=='subregion':
        s_grey = 1
        s_color = 1
        alpha_grey = 0.6
        alpha_color = 0.7
        
    if plot_type=='test':
        s_grey = 2
        s_color = 2
        alpha_grey = 0.6
        alpha_color = 0.7
        
    if plot_type=='pdf ML':
        s_grey = 0.5
        alpha_grey = 0.02
        
        #s_grey = 0.2
        #alpha_grey = 0.2
        s_color = 0.2
        alpha_color = 0.5
    
    ax.scatter(tsne[:,0][colors=='lightgrey'], tsne[:,1][colors=='lightgrey'],
               s=s_grey, alpha=alpha_grey, c='lightgrey', marker= '.', linewidths=0, ec='None', rasterized=True) 
    ax.scatter(tsne[:,0][colors!='lightgrey'], tsne[:,1][colors!='lightgrey'],
               s=s_color, alpha=alpha_color, c=colors[colors!='lightgrey'], marker= '.', linewidths=0, ec='None', rasterized=True) 
    
    
    if plot_type=='subregion':
        ax.axis('scaled')
    else:
        ax.axis('equal')
        
    ax.set_xlim(x_lim[0], x_lim[1])
    ax.set_ylim(y_lim[0], y_lim[1])
    if axis_on == False:
        ax.axis('off')
        
        
def plot_tsne_years(tsne, colors, x_lim, y_lim, ax=None, fontsize=7, plot_type=None, colorbar=True, colorbar_type=None, axis_on=False, rs = 42):
    """Plot t-SNE embedding with colors (by years).
    
    Parameters
    ----------
    tsne: array-like
        t-SNE coordinates.
    colors : array-like
        Color values for the colormap.
    x_lim : tuple (left, right)
        Limits of the x-axis.
    y_lim : tuple (bottom, top)
        Limits of the y-axis.
    ax : axes, optional
        Axes where to draw the figure. If ax=None, axes will be created.
    fontsize : int, default=7
        Fontsize for the years in the colorbar.
    plot_type : {None, 'subplot', 'subregion', 'test'}, default=None
        Style of the plot, modifies dotsize and alpha.
    colorbar : bool, default=True
        If True, colorbar will be plotted.
    colorbar_type : {None, 'neuroscience'}, default=None
        Style of the colorbar.
    axis_on : bool, default=False
        If True, axis is shown in plot.
    rs : int, default= 42
         Random seed for the reordering of points.
    
    """
    
    assert x_lim[0] < x_lim[1], "xlim values are in the wrong order."
    assert y_lim[0] < y_lim[1], "ylim values are in the wrong order."
    
    assert plot_type in [None, 'subplot', 'subregion', 'test'], "Not valid `plot_type` value. Choose from [None, 'subplot', 'subregion', 'test']."
    assert colorbar_type in [None, 'neuroscience'], "Not valid `colorbar_type` value. Choose from [None, 'neuroscience']."

    
    if ax is None:
        fig, ax = plt.subplots()
    
    s_color = 0.5
    alpha_color=0.2

    if plot_type == 'subplot':
        s_color = 0.2
        alpha_color=0.2
    
    if plot_type=='subregion':
        s_color = 0.5
        alpha_color = 0.7
        
    if plot_type=='test':
        s_color = 2
        alpha_color = 0.7
    
    np.random.seed(rs)
    reorder = np.random.permutation(tsne.shape[0])
    ax.scatter(tsne[reorder][:,0], tsne[reorder][:,1],s=s_color, c=colors[reorder],cmap='plasma', 
               alpha=alpha_color, marker= '.', linewidths=0, rasterized=True) 
    
    if plot_type=='subregion':
        ax.axis('scaled')
    else:
        ax.axis('equal')
        
    ax.set_xlim(x_lim[0], x_lim[1])
    ax.set_ylim(y_lim[0], y_lim[1])
    if axis_on == False:
        ax.axis('off')
    
    if colorbar == True:
        if colorbar_type== 'neuroscience':
            heatmap = ax.scatter([], [], c=[], cmap='plasma')
            cbar = plt.colorbar(heatmap, ax=ax, shrink=0.1, location='left', anchor= (0,0), panchor= (0, 0), pad=-.3, aspect=10) 

        else:
            heatmap = ax.scatter([], [], c=[], cmap='plasma')
            cbar = plt.colorbar(heatmap, ax=ax, shrink=0.1, anchor= (0.5, 0), panchor= (0, 0.5), pad=-.13, aspect=10)
            # anchor second coordinate controls y-position and pad controls x-position

        cbar.set_alpha(1)
        cbar.ax.get_yaxis().set_ticks([0,1])
        cbar.ax.get_yaxis().set_ticklabels(['1970','2021'])
        cbar.ax.tick_params(labelsize=fontsize)
        
        
def plot_tsne_genders(tsne, colors, x_lim, y_lim, ax=None, plot_type=None, legend=True, axis_on = False, rs = 42):
    """Plot t-SNE embedding with colors (by genders).
    
    Parameters CHANGE
    ----------
    tsne: array-like
        t-SNE coordinates.
    colors : array-like
        Color values for the colormap.
    x_lim : tuple (left, right)
        Limits of the x-axis.
    y_lim : tuple (bottom, top)
        Limits of the y-axis.
    ax : axes, optional
        Axes where to draw the figure. If ax=None, axes will be created.
    plot_type : {None, 'subplot_2', 'subplot_3', 'subplot_3_grey', 'subregion', 'test'}, default=None
        Style of the plot, modifies dotsize and alpha.
    legend : bool, default=True
        If True, legend is shown.
    axis_on : bool, default=False
        If True, axis is shown in plot.
    rs : int, default= 42
         Random seed for the reordering of points.
    
    """
    
    assert x_lim[0] < x_lim[1], "xlim values are in the wrong order"
    assert y_lim[0] < y_lim[1], "ylim values are in the wrong order"
    
    assert plot_type in [None, 'subplot_2', 'subplot_3', 'subregion', 'test', 'zoom'], "Not valid `plot_type` value. Choose from [None, 'subplot_2', 'subplot_3', 'subregion', 'test', 'zoom']."
    
    if ax is None:
        fig, ax = plt.subplots()
    
    s = 0.5
    alpha=0.2

    if plot_type == 'subplot_2':
        s = 0.2
        alpha=0.2
        
    if plot_type == 'subplot_3':
        s=0.1 #0.05 v2
        alpha=0.2
        
    if plot_type == 'subregion':
        s = 2
        alpha=0.7
    
    if plot_type=='test':
        s = 10
        alpha= 1
        
    if plot_type=='zoom':
        s = 3
        alpha= 0.7
    
    np.random.seed(rs)
    reorder = np.random.permutation(tsne.shape[0])
    tsne_reordered = tsne[reorder]
    colors_reordered = colors[reorder]
    
    mask_pred_authors = (colors_reordered == 'tab:blue') | (colors_reordered == 'tab:orange')


    ax.scatter(tsne_reordered[colors_reordered == 'lightgrey'][:,0], tsne_reordered[colors_reordered == 'lightgrey'][:,1],
               s=s, c='lightgrey', alpha=alpha, marker= '.', linewidths=0, rasterized=True) 
    ax.scatter(tsne_reordered[colors_reordered == 'black'][:,0], tsne_reordered[colors_reordered == 'black'][:,1],
               s=s, c='black' , alpha=alpha, marker= '.', linewidths=0, rasterized=True) 
    ax.scatter(tsne_reordered[mask_pred_authors][:,0], tsne_reordered[mask_pred_authors][:,1],
               s=s, c=colors_reordered[mask_pred_authors] , alpha=alpha, marker= '.', linewidths=0, rasterized=True) 
    
    if legend == True:
        point1  = ax.scatter([], [], c='tab:orange', s=10, alpha=1 , label = 'female')
        point2  = ax.scatter([], [], c='tab:blue', s=10, alpha=1 , label = 'male')
        point3  = ax.scatter([], [], c='black', s=10, alpha=1 , label = 'unknown gender')
        point4  = ax.scatter([], [], c='lightgrey', s=10, alpha=1 , label = 'unknown name')

        ax.legend(handles=[point2, point1, point3, point4], loc = 'lower left', fontsize = 5, frameon=False,
                  borderpad = 0.2, handletextpad = 0, handlelength = 1, borderaxespad = 1.5) #-0.2 before
        
    ax.axis('equal')
    ax.set_xlim(x_lim[0], x_lim[1])
    ax.set_ylim(y_lim[0], y_lim[1])
    if axis_on == False:
        ax.axis('off')
        
        
def plot_tsne_word(all_abstracts, word, tsne, x_lim, y_lim, ax=None, plot_type=None, title_on=False, axis_on = False, legend_on = False, verbose=True):
    """Plots t-SNE embedding with points having one given word in their abstract highlighted.
    It plots all points in grey, and papers that have that specific word/phrase in their abstract in black. 
    If more than one word is given, each of them will be plotted using colors from tab10 color palette.
    Take into account that if this happens, points will be plotted on top of each other instead of shuffled, so the amount of papers may be missleading.
    
    CURRENTLY NOT WORKING WHEN PASSING LIST OF WORDS INSTEAD OF SINGLE STR!
    
    Parameters
    ----------
    all_abstracts : pandas dataframe of str
        All texts (in this case abstracts).
    words : str or list of str
        Word/phrase or list with many words/phrases to be queried.
    tsne: array-like
        t-SNE coordinates.
    x_lim : tuple (left, right)
        Limits of the x-axis.
    y_lim : tuple (bottom, top)
        Limits of the y-axis.
    ax : axes, optional
        Axes where to draw the figure. If ax=None, axes will be created.
    plot_type : {None, 'subplot_2', 'subplot_3', 'subplot_3_grey', 'subregion', 'test'}, default=None
        Style of the plot, modifies dotsize and alpha.
    title_on : bool, default=False
        If True, adds the word being queried as title to the figure.
    axis_on : bool, default=False
        If True, axis is shown in plot.
    verbose : bool, default=True
        If True, prints the number of papers with that certain word and its variations in it .

    See Also
    --------
    exploration.find_mask_words
    
    """

    assert x_lim[0] < x_lim[1], "xlim values are in the wrong order"
    assert y_lim[0] < y_lim[1], "ylim values are in the wrong order"
    
    assert plot_type in [None, 'subplot_2', 'subplot_3', 'subplot_3_grey', 'subregion', 'test'], "Not valid `plot_type` value. Choose from [None, 'subplot_2', 'subplot_3', 'subplot_3_grey', 'subregion', 'test']."
    
    s_grey = 0.1
    s_color = 0.5
    alpha_grey = 0.2
    alpha_color = 0.5 #0.2
    
    if plot_type=='subplot_2':
        s_grey = 0.2
        s_color = 0.2
    
    if plot_type == 'subplot_3':
        s_grey = 0.1
        s_color = 0.1

    if plot_type == 'subplot_3_grey':
        s_grey = 0.05
        alpha_grey = 0.01
        s_color = 0.2
        alpha_color = 0.2 #0.5
    
    if plot_type=='subregion':
        s_grey = 1
        s_color = 1
        alpha_grey = 0.6
        alpha_color = 0.7
        
    if plot_type=='test':
        s_grey = 2
        s_color = 2
        alpha_grey = 0.6
        alpha_color = 0.7
    
    
    if ax is None:
        fig, ax = plt.subplots()

    if type(word) is str:
        mask = find_mask_words(all_abstracts, word, verbose=verbose)

        subregion=tsne[mask]
        ax.scatter(tsne[:,0], tsne[:,1], c = 'lightgrey', s=s_grey, alpha=alpha_grey, linewidths=0, rasterized=True)
        ax.scatter(subregion[:,0],subregion[:,1],s=s_color,c='black',alpha=alpha_color, marker='.', linewidths=0, rasterized=True)
        
        if title_on == True:
            #ax.set_title('"'+word+'"')
            ax.text(0.5,1, '"'+word+'"', transform=ax.transAxes, va='top', ha='center')
        
        
    elif type(word) is list:
        ax.scatter(tsne[:,0], tsne[:,1], c = 'lightgrey', s=s_grey, alpha=alpha_grey, linewidths=0, rasterized=True)
        
        for i, elem in enumerate(word):
            mask = find_mask_words(all_abstracts, elem, verbose=verbose)
            
            if verbose == True:
                print('---------------')
                
            subregion=tsne[mask]
            ax.scatter(subregion[:,0],subregion[:,1], c=np.matrix(plt.cm.tab10(i)), s=s_color,alpha=alpha_color, marker='.', linewidths=0, rasterized=True)
            ax.scatter([],[], c=np.matrix(plt.cm.tab10(i)), s=10, alpha=1, label='"'+elem+'"')
            
        if legend_on == True:
            ax.legend()
    
    ax.axis('equal')
    ax.set_xlim(x_lim[0], x_lim[1])
    ax.set_ylim(y_lim[0], y_lim[1])
    
    if axis_on == False:
        ax.axis('off')
        
        
def plot_tsne_zoom(tsne, mask, x_lim, y_lim, ax=None, plot_type=None, title_on=False, axis_on=False, verbose=True):
    """Plots faster zoomed regions of t-SNE embedding.
    It plots all points in grey. One can pass an additional mask and it will color papers from the mask in black.

    Parameters
    ----------
    tsne: array-like
        t-SNE coordinates.
    mask : array of bool or None
        If given, colors points from that mask in black on top of the grey points.
    x_lim : tuple (left, right)
        Limits of the x-axis.
    y_lim : tuple (bottom, top)
        Limits of the y-axis.
    ax : axes, optional
        Axes where to draw the figure. If ax=None, axes will be created.
    plot_type : {None, 'zoom x2'}, default=None
        Style of the plot, modifies dotsize and alpha.
    title_on : bool, default=False
        If True, adds the word being queried as title to the figure.
    axis_on : bool, default=False
        If True, axis and grid are shown in plot.
    verbose : bool, default=True
        If True, prints the number of papers with that certain word and its variations in it .
    """

    assert x_lim[0] < x_lim[1], "xlim values are in the wrong order"
    assert y_lim[0] < y_lim[1], "ylim values are in the wrong order"

    assert plot_type in [
        None,
        "zoom x2",
    ], "Not valid `plot_type` value. Choose from [None, 'zoom x2']."

    s_grey = 3
    s_color = 3
    alpha_grey = 0.5
    alpha_color = 0.7

    if plot_type == "zoom x2":
        s_grey = 5
        s_color = 5
        alpha_grey = 0.5
        alpha_color = 0.7

    if ax is None:
        fig, ax = plt.subplots()


    mask_grey = (
        (tsne[:, 0] < x_lim[1])
        & (tsne[:, 0] > x_lim[0])
        & (tsne[:, 1] < y_lim[1])
        & (tsne[:, 1] > y_lim[0])
    )

    # plot
    ax.scatter(
        tsne[mask_grey, 0],
        tsne[mask_grey, 1],
        s=s_grey,
        c="lightgrey",
        alpha=alpha_grey,
        marker=".",
        # linewidths=0,
        ec="None",
        rasterized=True,
    )
    
    if mask is not None:
        mask_colors = (
            (tsne[:, 0] < x_lim[1])
            & (tsne[:, 0] > x_lim[0])
            & (tsne[:, 1] < y_lim[1])
            & (tsne[:, 1] > y_lim[0])
            & mask
        )

        ax.scatter(
            tsne[mask_colors, 0],
            tsne[mask_colors, 1],
            s=s_color,
            c="black",
            alpha=alpha_color,
            marker=".",
            # linewidths=0,
            ec="None",
            rasterized=True,
        )

    ax.axis("equal")
    ax.set_xlim(x_lim[0], x_lim[1])
    ax.set_ylim(y_lim[0], y_lim[1])

    if axis_on == False:
        ax.axis("off")
    else:
        ax.grid()