#!/usr/bin/env python3

import datetime as dt
import itertools as it
import numpy as np
import matplotlib.pylab as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import seaborn as sns
import os


def pubs_over_time(pub_date_data_list,
                   color_list=None,
                   label_list=None,
                   legend_title=None,
                   year_range=(2000, 2019),
                   hist_type='barstacked',
                   output_fname=None,
                   required_date=None,
                   encouraged_date=None):
    """Plot histogram of publications over time for groups of articles.

    Parameters
    ----------
    pub_date_data_list : list of pandas.DataFrame
        One or more pandas data frames containing the publications to be
        counted.
    color_list : list of str, optional
        One color for each group in `pub_date_data_list`, by default None.
    label_list : list of str, optional
        One label for each group in `pub_date_data_list`, by default None.
    legend_title : str, optional
        Title for legend, by default None
    year_range : tuple of two ints, optional
        Minimum and maximum years to be plotted on x axis,
        by default (2000, 2019).
    hist_type : {'bar', 'barstacked', 'step'  or 'stepfilled'}, optional
        Histogram style, by default 'barstacked'.
    output_fname : str, optional
        Filename to save output file as .png image. Passing this argument will
        make a new directory if necessary and will overwrite a file that
        already exists there without warning, by default None (no figure
        saved).
    required_date : datetime, optional
        Date at which to add a solid vertical line to indicate when the journal
        or publisher required a data availability statement to be included in
        the submitted article, by default None (no line added).
    encouraged_date : datetime, optional
        Date at which to add a dashed vertical line to indicate when the
        journal or publisher required a data availability statement to be
        included in the submitted article, by default None (no line added).
    """

    # Make the year list
    year_list = np.arange(year_range[0], year_range[1]+1)
    bins_list = [dt.date(year, 1, 1) for year in year_list]
    bins_list = mdates.date2num(bins_list)
    xticks_list = [dt.date(year, 6, 15) for year in year_list[:-1]]
    xticks_label_list = year_list[:-1]
    if len(year_list) > 8:
        xticks_list = [dt.date(year, 6, 15) for year in year_list[:-1:3]]
        xticks_list = mdates.date2num(xticks_list)
        xticks_label_list = year_list[:-1:3]

    # Create histogram across time
    fig, ax = plt.subplots(figsize=(10, 6))

    # Make the stacked histogram
    ax.hist(pub_date_data_list,
            bins=bins_list,
            histtype=hist_type,
            color=color_list,
            label=label_list)

    # Add the required date line (if applicable)
    if required_date:
        ax.axvline(mdates.date2num(required_date),
                   color='k',
                   linestyle='solid',
                   linewidth=3)

    # Add the encouraged date line (if applicable)
    if encouraged_date:
        ax.axvline(mdates.date2num(encouraged_date),
                   color='k',
                   linestyle='dashed',
                   linewidth=3)

    # Add the legend
    if label_list:
        legend = ax.legend()
        legend.set_title(legend_title, prop={'size': 14})
        legend._legend_box.align = "left"

    # Adjust the plot to make it pretty
    ax.set_xlabel('Publication date')
    ax.set_xticks(xticks_list)
    ax.set_xticklabels(xticks_label_list)
    ax.set_ylabel('Number of articles')
    ax.yaxis.set_major_locator(ticker.MaxNLocator(5))
    sns.despine()

    # Tight layout to look really pretty
    plt.tight_layout()

    # Save the figure
    if output_fname:
        d = os.path.dirname(output_fname)
        if not os.path.isdir(d):
            os.makedirs(d)
        fig.savefig(output_fname, dpi=300, bbox_inches='tight')

    return fig, ax


def get_mandate_dates(df_policies, publisher='All', journal='All'):
    """Get the required and encouraged dates at which the specific publisher
    or journal mandated data availability statements appear in the submitted
    articles.

    Parameters
    ----------
    df_policies : pandas dataframe
        Know policy dates for journals and publishers
    publisher : str
        Publisher name, by default "All"
    journal : str
        Journal name, by default "All"
    """
    # Make journal and publisher lowercase to make it easier to search
    publisher = publisher.lower()
    journal = journal.lower()

    try:
        required_date = df_policies.loc[(df_policies['Group'] == publisher) &
                                        (df_policies['Journal'] == journal),
                                        'Required'].values[0]

    except IndexError:
        try:
            required_date = df_policies.loc[(df_policies['Group'] ==
                                             publisher) &
                                            (df_policies['Journal'] == 'all'),
                                            'Required'].values[0]

        except IndexError:
            required_date = None

    try:
        encouraged_date = df_policies.loc[(df_policies['Group'] == publisher) &
                                          (df_policies['Journal'] == journal),
                                          'Encouraged'].values[0]

        # Replace encouraged date with None if it doesn't exist
        if np.isnat(encouraged_date):
            encouraged_date = None

    except IndexError:
        try:
            encouraged_date = df_policies.loc[(df_policies['Group'] ==
                                              publisher) &
                                              (df_policies['Journal'] ==
                                              'all'),
                                              'Encouraged'].values[0]

            # Replace encouraged date with None if it doesn't exist
            if np.isnat(encouraged_date):
                encouraged_date = None

        except IndexError:
            encouraged_date = None

    return required_date, encouraged_date


def make_lots_of_plots(df,
                       publisher_journal_dict,
                       palette_extended,
                       df_policies):

    # Define a few masks:
    # - the three das classes and no DAS
    das1_mask = df['das_class'] == 1
    das2_mask = df['das_class'] == 2
    das3_mask = df['das_class'] == 3
    nodas_mask = df['has_das'] == False

    # Plot the data over two year ranges
    year_dict = {'Dates_2000to2019': (2000, 2019),
                 'Dates_2012to2019': (2012, 2019)}

    for ((article_selection_label,
         (publisher_name,
          journal_name,
          article_selection_mask,
          color_counter)),
         (year_str, year_range)) in it.product(publisher_journal_dict.items(),
                                               year_dict.items()):

        # Stack the data you want to visualise
        pub_date_data = [df.loc[(article_selection_mask) &
                                (nodas_mask), 'p_date'],
                         df.loc[(article_selection_mask) &
                                (das1_mask), 'p_date'],
                         df.loc[(article_selection_mask) &
                                (das2_mask), 'p_date'],
                         df.loc[(article_selection_mask) &
                                (das3_mask), 'p_date']]

        # Label the data frame
        label_list = ['no DAS',
                      'Upon request',
                      'In paper & SI',
                      'In repository']

        # Get the right colours
        color_list = [palette_extended[(color_counter*6) + 2],
                      palette_extended[(color_counter*6) + 3],
                      palette_extended[(color_counter*6) + 4],
                      palette_extended[(color_counter*6) + 5]]

        # Set up the legend
        legend_title = article_selection_label

        # Get the required and encouraged dates
        (required_date,
         encouraged_date) = get_mandate_dates(df_policies,
                                              publisher=publisher_name,
                                              journal=journal_name)

        date_line_dict = {'NoDateLine': (None, None),
                          'DateLine': (required_date, encouraged_date)}

        # Lets make one stacked and one regular bar histogram
        # and one version with and one without the datelines
        for (hist_type,
             (date_line_str,
              (required_date,
               encouraged_date))) in it.product(['bar', 'barstacked'],
                                                date_line_dict.items()):

            output_fname = os.path.join('..',
                                        'figures',
                                        year_str,
                                        date_line_str,
                                        hist_type,
                                        ('PubsOverTime_{}_ByDas.png').format(article_selection_label.replace(" ", "_")))  # noqa

            # Make the figure
            fig, ax = pubs_over_time(pub_date_data,
                                     color_list=color_list,
                                     label_list=label_list,
                                     legend_title=legend_title,
                                     year_range=year_range,
                                     hist_type=hist_type,
                                     output_fname=output_fname,
                                     required_date=required_date,
                                     encouraged_date=encouraged_date)

            if ((hist_type == 'barstacked') and
               (date_line_str == 'DateLine')):
                plt.show()

            plt.close()