#!/bin/bash
__doc__="

Requirements:

    * The SMART WATCH Python repo must be installed in the current python virtualenv

    * The SMART WATCH DVC Repo should be checked out and in a known location

    * The Drop2-Aligned-TA1-2022-01 dataset should be DVC-pulled so it is on-disk

"
#export CUDA_VISIBLE_DEVICES="1"

DVC_DPATH=/localdisk0/SCRATCH/watch/ben/smart_watch_dvc
DVC_DPATH=$(geowatch_dvc)
WORKDIR=$DVC_DPATH/training/$HOSTNAME/$USER

# Point to latest dataset version
DATASET_CODE=uky_invariants/features_22_03_14
KWCOCO_BUNDLE_DPATH=$DVC_DPATH/$DATASET_CODE

### TODO: CHANGE TO KWCOCO FILES THAT CONTAIN TEAM FEATURES
TRAIN_FPATH=$KWCOCO_BUNDLE_DPATH/data_train.kwcoco.json
VALI_FPATH=$KWCOCO_BUNDLE_DPATH/data_vali.kwcoco.json
TEST_FPATH=$KWCOCO_BUNDLE_DPATH/data_vali.kwcoco.json

### TODO: CHANGE INPUT CHANNELS TO NETWORK.
CHANNELS="blue|green|red|nir|swir16|swir22,invariants.0:7"
### e.g. "blue|green|red|nir|swir16|swir22,myfeat.0:8"

# Set initial state to a noop to train from scratch, or set it to an existing
# model to transfer the initial weights (as best as possible) using
# torch-liberator partial weight loading.
INITIAL_STATE="noop"
#INITIAL_STATE="$DVC_DPATH/models/fusion/SC-20201117/BOTH_smt_it_stm_p8_L1_DIL_v55/BOTH_smt_it_stm_p8_L1_DIL_v55_epoch=5-step=53819.pt"

### TODO: CHANGE TO A UNIQUE NAME FOR EACH EXPERIMENT
EXPERIMENT_NAME=features_late_fusion_V001

debug_notes(){
    # Print stats about train and validation datasets
    python -m geowatch stats "$VALI_FPATH" "$TRAIN_FPATH"
    python -m kwcoco stats "$VALI_FPATH" "$TRAIN_FPATH"
}

DEFAULT_ROOT_DIR=$WORKDIR/$DATASET_CODE/runs/$EXPERIMENT_NAME

__hyperparam_notes__='

The following hyperparams are reasonable defaults

Key hyperparams to pay attention to are:

* global_class_weight - when non-zero enables the SC classification head
* global_saliency_weight - when non-zero enables the BAS saliency head
* chip_size - pixel size of the spatial input window
* time_steps - number of frames to use
* time_sampling - strategy for temporal sampling. See --help for other options.
* tokenizer - method for breaking up the input data-cube into tokens
* normalize_inputs - number of dataset iterations to use to estimate mean/std for network normalization

See `python -m geowatch.tasks.fusion.fit --help` for details on each
hyperparameter.  Note, some parameters exposed in this help no longer work or
are not hooked up. Email Jon C if you have any questions.
'

python -m geowatch.tasks.fusion.fit \
    --default_root_dir="$DEFAULT_ROOT_DIR" \
    --name=$EXPERIMENT_NAME \
    --train_dataset="$TRAIN_FPATH" \
    --vali_dataset="$VALI_FPATH" \
    --test_dataset="$TEST_FPATH" \
    --channels="$CHANNELS" \
    --global_class_weight=1.00 \
    --global_saliency_weight=1.00 \
    --neg_to_pos_ratio=0.25 \
    --saliency_loss='dicefocal' \
    --class_loss='dicefocal' \
    --num_workers=8 \
    --gpus "1" \
    --batch_size=1 \
    --accumulate_grad_batches=1 \
    --learning_rate=1e-4 \
    --weight_decay=1e-5 \
    --dropout=0.1 \
    --attention_impl=exact \
    --chip_size=380 \
    --time_steps=5 \
    --chip_overlap=0.0 \
    --time_sampling=soft+distribute \
    --time_span=7m \
    --tokenizer=linconv \
    --optimizer=AdamW \
    --method="MultimodalTransformer" \
    --arch_name=smt_it_stm_p8 \
    --normalize_inputs=1024 \
    --max_epochs=40 \
    --patience=40 \
    --max_epoch_length=none \
    --draw_interval=5000m \
    --num_draw=1 \
    --amp_backend=apex \
    --init="$INITIAL_STATE"


gather_checkpoint_notes(){
    __doc__="
    Every so often, I run the repackage command and gather the packaged
    checkpoints for evaluation.
    "

    # This method only works for the current fusion model
    # It would be better if the fit command was able to take care of this
    python -m geowatch.tasks.fusion.repackage repackage "$DEFAULT_ROOT_DIR/lightning_logs/version_*/checkpoints/*.ckpt"

    # To ensure the results of our experiments are maintained, we copy them to
    # the DVC directory.
    BASE_SAVE_DPATH=$DVC_DPATH/models/fusion/baseline
    EXPT_SAVE_DPATH=$BASE_SAVE_DPATH/$EXPERIMENT_NAME
    mkdir -p "$BASE_SAVE_DPATH"
    mkdir -p "$EXPT_SAVE_DPATH"

    cp "$DEFAULT_ROOT_DIR"/lightning_logs/version_*/checkpoints/*.pt "$EXPT_SAVE_DPATH"
}


predict_and_evaluate_checkpoints(){
    __doc__='
    Given the checkpoint candidates, we can "schedule" them for evaluation.
    This schedule evaluations script is a work in progress.
    There are two ways of using it:

    1. If run=0, all it does is build the appropriate bash commands to run 
       prediction and evaluation

    2. If run=1, it will launch the jobs via the hacky tmux-queue, which really
       should be a slurm queue. The number of jobs will depend on the setting
       of "--gpus". E.g. specify the index of the gpus to use --gpus="0,1,2,3"

    Note:
        This whole queueing system is a work in progress and if anyone knows
        any good libraries for Python that let you submit a bash job, specify
        how many concurrent jobs can be running at the same time, and allow
        jobs to depend on other jobs, let me know.  If this doesnt exist I want
        to make it with multiprocessing, tmux, and slurm backends.
    '
    python -m geowatch.tasks.fusion.schedule_evaluation schedule_evaluation \
            --gpus="0," \
            --model_globstr="$EXPT_SAVE_DPATH/*.pt" \
            --test_dataset="$VALI_FPATH" \
            --run=0 --skip_existing=True
}


aggregate_multiple_evaluations(){
    __doc__="
    This script will aggregate results over all packaged checkpoints with
    computed metrics. You can run this while the schedule_evaluation script is
    running. It will dump aggregate stats into the 'out_dpath' folder.
    "

    DVC_DPATH=$HOME/data/dvc-repos/smart_watch_dvc
    DVC_DPATH=$(geowatch_dvc)

    EXPT_NAME_PAT="EARLY_FUSION_V001"
    MODEL_EPOCH_PAT="*"
    PRED_DSET_PAT="*"
    MEASURE_GLOBSTR=$DVC_DPATH/models/fusion/baseline/${EXPT_NAME_PAT}/${MODEL_EPOCH_PAT}/${PRED_DSET_PAT}/eval/curves/measures2.json
    python -m geowatch.tasks.fusion.aggregate_results \
        --measure_globstr="$MEASURE_GLOBSTR" \
        --out_dpath="$DVC_DPATH/agg_results/baseline" \
        --dset_group_key="Drop2-Aligned-TA1-2022-02-15_data_vali.kwcoco"

}