#!/usr/bin/env python3 # PYTHON_ARGCOMPLETE_OK """ The following example simply produces the script under different variations. CommandLine: xdoctest -m geowatch.cli.queue_cli.prepare_teamfeats __doc__ SeeAlso: ../tasks/invariants/predict.py ../tasks/landcover/predict.py ../tasks/depth/predict.py ../tasks/cold/predict.py ~/code/watch/dev/poc/prepare_time_combined_dataset.py Example: >>> from geowatch.cli.queue_cli.prepare_teamfeats import * # NOQA >>> config = { >>> 'src_kwcocos': './pretend_bundle/data.kwcoco.json', >>> 'gres': [0, 1], >>> 'expt_dvc_dpath': './pretend_expt_dvc', >>> # >>> 'virtualenv_cmd': 'conda activate geowatch', >>> # >>> #'with_s2_landcover': 1, >>> #'with_materials': 1, >>> #'with_invariants2': 1, >>> 'with_mae': 1, >>> # >>> 'run': 0, >>> 'check': False, >>> 'skip_existing': False, >>> 'backend': 'serial', >>> } >>> config['backend'] = 'slurm' >>> outputs = prep_feats(cmdline=False, **config) >>> outputs['queue'].print_commands(0, 0) >>> config['backend'] = 'tmux' >>> outputs = prep_feats(cmdline=False, **config) >>> outputs['queue'].print_commands(0, 0) >>> config['backend'] = 'serial' >>> outputs = prep_feats(cmdline=False, **config) >>> outputs['queue'].print_commands(0, 0) Example: >>> # Test landcover commands >>> from geowatch.cli.queue_cli.prepare_teamfeats import * # NOQA >>> config = { >>> 'src_kwcocos': './PRETEND_BUNDLE/data.kwcoco.json', >>> 'gres': [0, 1], >>> 'expt_dvc_dpath': './PRETEND_EXPT_DVC', >>> 'virtualenv_cmd': 'conda activate geowatch', >>> 'with_s2_landcover': 1, >>> 'with_wv_landcover': 1, >>> 'num_wv_landcover_hidden': 0, >>> 'num_s2_landcover_hidden': 0, >>> 'run': 0, >>> 'check': False, >>> 'skip_existing': False, >>> 'backend': 'serial', >>> } >>> config['backend'] = 'serial' >>> outputs = prep_feats(cmdline=False, **config) >>> outputs['queue'].print_commands(0, 0) >>> output_paths = outputs['final_output_paths'] >>> print('output_paths = {}'.format(ub.urepr(output_paths, nl=1))) Example: >>> # Test COLD commands >>> from geowatch.cli.queue_cli.prepare_teamfeats import * # NOQA >>> cold_config = ub.codeblock( ''' enabled: 1 conse: 42 sensors: 'L8,S2,WV' resolution: 3GSD ''') >>> config = { >>> 'src_kwcocos': './PRETEND_BUNDLE/data.kwcoco.json', >>> 'gres': [0, 1], >>> 'expt_dvc_dpath': './PRETEND_EXPT_DVC', >>> 'virtualenv_cmd': 'conda activate geowatch', >>> 'cold_config': cold_config, >>> 'run': 0, >>> 'check': False, >>> 'skip_existing': False, >>> 'backend': 'serial', >>> } >>> config['backend'] = 'serial' >>> outputs = prep_feats(cmdline=False, **config) >>> outputs['queue'].print_commands(0, 0) >>> output_paths = outputs['final_output_paths'] >>> print('output_paths = {}'.format(ub.urepr(output_paths, nl=1))) Example: >>> # Test COLD commands >>> from geowatch.cli.queue_cli.prepare_teamfeats import * # NOQA >>> cold_config = None >>> config = { >>> 'src_kwcocos': './PRETEND_BUNDLE/data.kwcoco.json', >>> 'gres': [0, 1], >>> 'expt_dvc_dpath': './PRETEND_EXPT_DVC', >>> 'virtualenv_cmd': 'conda activate geowatch', >>> 'cold_config': cold_config, >>> 'with_cold': cold_config, >>> 'run': 0, >>> 'check': False, >>> 'skip_existing': False, >>> 'backend': 'serial', >>> } >>> config['backend'] = 'serial' >>> outputs = prep_feats(cmdline=False, **config) >>> outputs['queue'].print_commands(0, 0) >>> output_paths = outputs['final_output_paths'] >>> print('output_paths = {}'.format(ub.urepr(output_paths, nl=1))) Ignore: # Drop 6 export CUDA_VISIBLE_DEVICES="0,1" DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto) DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware=auto) BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6 python -m geowatch.cli.queue_cli.prepare_teamfeats \ --src_kwcocos "$BUNDLE_DPATH"/imganns-*.kwcoco.zip \ --expt_dvc_dpath="$DVC_EXPT_DPATH" \ --with_invariants2=0 \ --with_s2_landcover=0 \ --with_materials=0 \ --with_depth=0 \ --with_cold=1 \ --skip_existing=1 \ --gres=0,1 --tmux_workers=4 --backend=tmux --run=0 --print-commands # Drop 6 export CUDA_VISIBLE_DEVICES="0,1" DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto) DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware=auto) BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6 python -m geowatch.cli.queue_cli.prepare_teamfeats \ --src_kwcocos "$BUNDLE_DPATH"/imganns-KR_R00*.kwcoco.zip \ --expt_dvc_dpath="$DVC_EXPT_DPATH" \ --with_invariants2=1 \ --with_s2_landcover=0 \ --with_materials=0 \ --with_depth=0 \ --with_cold=0 \ --skip_existing=1 \ --assets_dname=teamfeats \ --gres=0,1 --tmux_workers=4 --backend=tmux --run=0 # TimeCombined V2 export CUDA_VISIBLE_DEVICES="0,1" DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto) DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware='auto') BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2 python -m geowatch.cli.queue_cli.prepare_teamfeats \ --src_kwcocos "$BUNDLE_DPATH"/imganns-*[0-9].kwcoco.zip \ --expt_dvc_dpath="$DVC_EXPT_DPATH" \ --with_s2_landcover=1 \ --with_invariants2=1 \ --with_sam=1 \ --with_materials=0 \ --with_depth=0 \ --with_cold=0 \ --skip_existing=1 \ --assets_dname=teamfeats \ --gres=0, --tmux_workers=1 --backend=tmux --run=0 DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto) python -m geowatch.cli.queue_cli.prepare_splits \ --src_kwcocos=$DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2/combo_imganns*_I2LS*.kwcoco.zip \ --constructive_mode=True \ --suffix=I2LS \ --backend=tmux --tmux_workers=6 \ --run=1 DVC_DATA_DPATH=$(geowatch_dvc --tags='phase2_data' --hardware=auto) DVC_EXPT_DPATH=$(geowatch_dvc --tags='phase2_expt' --hardware='auto') TRUE_SITE_DPATH=$DVC_DATA_DPATH/annotations/drop6_hard_v1/site_models OUTPUT_BUNDLE_DPATH=$DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2 python -m geowatch reproject \ --src $DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2/data_vali_I2LS_split6.kwcoco.zip \ --inplace \ --site_models=$TRUE_SITE_DPATH python -m geowatch reproject \ --src $DVC_DATA_DPATH/Drop6-MeanYear10GSD-V2/data_train_I2LS_split6.kwcoco.zip \ --inplace \ --site_models=$TRUE_SITE_DPATH """ import scriptconfig as scfg import ubelt as ub from cmd_queue.cli_boilerplate import CMDQueueConfig class TeamFeaturePipelineConfig(CMDQueueConfig): """ This generates the bash commands necessary to run team feature computation, followed by aggregation and then splitting out train / val datasets. Note: The models and parameters to use are hard coded in this script. TODO: - [ ] jsonargparse use-case: specifying parmeters of the subalgos """ src_kwcocos = scfg.Value(None, help=ub.paragraph( ''' One or more base coco files to compute team-features on. '''), nargs='+', alias=['base_fpath'], group='inputs') expt_dvc_dpath = scfg.Value('auto', help=ub.paragraph( ''' The DVC directory where team feature model weights can be found. If "auto" uses the ``geowatch.find_dvc_dpath(tags='phase2_expt')`` mechanism to infer the location. '''), group='inputs') gres = scfg.Value('auto', help='comma separated list of gpus or auto', group='cmd-queue') with_s2_landcover = scfg.Value(False, help='Include DZYNE S2 landcover features', group='team feature enablers') with_wv_landcover = scfg.Value(False, help='Include DZYNE WV landcover features', group='team feature enablers') with_materials = scfg.Value(False, help='Include Rutgers material features', group='team feature enablers') with_mae = scfg.Value(False, help='Include WU MAE features', group='team feature enablers') with_invariants2 = scfg.Value(False, help='Include UKY invariant features', group='team feature enablers') with_depth = scfg.Value(False, help='Include DZYNE WorldView depth features', group='team feature enablers') with_sam = scfg.Value(False, help='Include SAM features') cold_config = scfg.Value(None, type=str, help=ub.paragraph( ''' Raw json/yaml or a path to a json/yaml file that specifies the config for cold teamfeats. ''')) num_s2_landcover_hidden = 32 num_wv_landcover_hidden = 32 invariant_segmentation = scfg.Value(False, help=ub.paragraph( ''' Enable/Disable segmentation part of invariants '''), group='invariants options') invariant_pca = scfg.Value(0, help='Enable/Disable invariant PCA', group='invariants options') invariant_resolution = scfg.Value('10GSD', help='GSD for invariants', group='invariants options') virtualenv_cmd = scfg.Value(None, type=str, help=ub.paragraph( ''' Command to start the appropriate virtual environment if your bashrc does not start it by default. ''')) skip_existing = scfg.Value(True, help='if True skip completed results', group='common options') data_workers = scfg.Value(2, help='dataloader workers for each proc', group='common options') kwcoco_ext = scfg.Value('.kwcoco.zip', help=ub.paragraph( ''' use .kwcoco.json or .kwcoco.zip for outputs '''), group='common options') assets_dname = scfg.Value('_teamfeats', help=ub.paragraph( ''' The name of the top-level directory to write new assets. '''), group='common options') check = scfg.Value(True, help='if True check files exist where we can', group='common options') with_cold = scfg.Value(False, help='Include COLD features. DEPRECATED: pass enabled: 1 in the cold_config') cold_workers = scfg.Value(2, help='workers for pycold. DEPRECATED pass as workermode in "cold_config"', group='cold options') cold_workermode = scfg.Value('process', help='workers mode for pycold. DEPRECATED pass as workermode in "cold_config"', group='cold options') depth_workers = scfg.Value(2, help=ub.paragraph( ''' workers for depth only. On systems with < 32GB RAM might need to set to 0 '''), group='depth options') def prep_feats(cmdline=True, **kwargs): """ The idea is that we should have a lightweight scheduler. I think something fairly minimal can be implemented with tmux, but it would be nice to have a more robust slurm extension. TODO: - [ ] Option to just dump the serial bash script that does everything. """ config = TeamFeaturePipelineConfig.cli(cmdline=cmdline, data=kwargs, strict=True) import rich rich.print('config = {}'.format(ub.urepr(config, nl=2))) from scriptconfig.smartcast import smartcast from kwutil import util_path # hack for cmd-queue, will be fixed soon config.slurm_options = config.slurm_options or {} gres = smartcast(config['gres']) if gres is None: gres = 'auto' if gres == 'auto': import torch gres = list(range(torch.cuda.device_count())) elif not ub.iterable(gres): gres = [gres] if config['expt_dvc_dpath'] == 'auto': import geowatch expt_dvc_dpath = geowatch.find_dvc_dpath(tags='phase2_expt', hardware='auto') else: expt_dvc_dpath = ub.Path(config['expt_dvc_dpath']) blocklist = [ '_dzyne_landcover', '_dzyne_s2_landcover', '_dzyne_wv_landcover', '_uky_invariants', '_rutgers_material_seg_v4', ] base_fpath_pat = config['src_kwcocos'] base_fpath_list = list(util_path.coerce_patterned_paths( base_fpath_pat, globfallback=True)) from geowatch.mlops.pipeline_nodes import Pipeline dag_nodes = [] final_output_paths = [] for src_fpath in base_fpath_list: # Hack to prevent doubling up. # Should really just choose a better naming scheme so we don't have # to break user expectations about glob if any(b in src_fpath.name for b in blocklist): print(f'blocked src_fpath={src_fpath}') continue if config.check: if not src_fpath.exists(): raise FileNotFoundError( 'Specified kwcoco file: {src_fpath!r=} does not exist and check=True') aligned_bundle_dpath = src_fpath.parent nodes, base_combo_fpath = _make_teamfeat_nodes( src_fpath, expt_dvc_dpath, aligned_bundle_dpath, config) final_output_paths.append(base_combo_fpath) dag_nodes.extend(nodes) dag = Pipeline(dag_nodes) dag.configure(cache=True) queue = config.create_queue(gres=gres) dag.submit_jobs( queue=queue, skip_existing=config['skip_existing'], enable_links=False, write_invocations=False, write_configs=False, ) # pipeline._populate_explicit_dependency_queue(queue) config.run_queue(queue) outputs = { 'queue': queue, 'final_output_paths': final_output_paths, } return outputs def _make_teamfeat_nodes(src_fpath, expt_dvc_dpath, aligned_bundle_dpath, config): from geowatch.mlops.pipeline_nodes import ProcessNode from kwutil import util_parallel from geowatch.utils import simple_dvc data_workers = util_parallel.coerce_num_workers(config['data_workers']) model_fpaths = { # 'rutgers_materials': expt_dvc_dpath / 'models/rutgers/rutgers_peri_materials_v3/experiments_epoch_18_loss_59.014100193977356_valmF1_0.18694573888313187_valChangeF1_0.0_time_2022-02-01-01:53:20.pth', # 'rutgers_materials': dvc_dpath / 'models/rutgers/experiments_epoch_62_loss_0.09470022770735186_valmIoU_0.5901660531463717_time_2021101T16277.pth', 'rutgers_materials_model_v4': expt_dvc_dpath / 'models/rutgers/ru_model_05_25_2023.ckpt', 'rutgers_materials_config_v4': expt_dvc_dpath / 'models/rutgers/ru_config_05_25_2023.yaml', 'wu_mae_v1': expt_dvc_dpath / 'models/wu/wu_mae_2023_04_21/Drop6-epoch=01-val_loss=0.20.ckpt', # 'dzyne_s2_landcover': expt_dvc_dpath / 'models/landcover/visnav_remap_s2_subset.pt', 'dzyne_s2_landcover': expt_dvc_dpath / 'models/landcover/sentinel2.pt', 'dzyne_wv_landcover': expt_dvc_dpath / 'models/landcover/worldview.pt', # 2022-02-11 # 'uky_pretext': dvc_dpath / 'models/uky/uky_invariants_2022_02_11/TA1_pretext_model/pretext_package.pt', # 'uky_pca': dvc_dpath / 'models/uky/uky_invariants_2022_02_11/TA1_pretext_model/pca_projection_matrix.pt', # 'uky_segmentation': dvc_dpath / 'models/uky/uky_invariants_2022_02_11/TA1_segmentation_model/segmentation_package.pt', # 2022-03-11 # 'uky_pretext': dvc_dpath / 'models/uky/uky_invariants_2022_03_11/TA1_pretext_model/pretext_package.pt', # 'uky_pca': dvc_dpath / 'models/uky/uky_invariants_2022_03_11/TA1_pretext_model/pca_projection_matrix.pt', # 'uky_segmentation': dvc_dpath / 'models/uky/uky_invariants_2022_02_11/TA1_segmentation_model/segmentation_package.pt', # uses old segmentation model # 2022-03-21 'uky_pretext': expt_dvc_dpath / 'models/uky/uky_invariants_2022_03_21/pretext_model/pretext_package.pt', 'uky_pca': expt_dvc_dpath / 'models/uky/uky_invariants_2022_03_21/pretext_model/pretext_pca_104.pt', 'uky_pretext2': expt_dvc_dpath / 'models/uky/uky_invariants_2022_12_17/TA1_pretext_model/pretext_package.pt', # 'uky_segmentation': dvc_dpath / 'models/uky/uky_invariants_2022_02_21/TA1_segmentation_model/segmentation_package.pt', # uses old segmentation model # TODO: use v1 on RGB and v2 on PAN 'dzyne_depth': expt_dvc_dpath / 'models/depth/weights_v1.pt', # 'dzyne_depth': dvc_dpath / 'models/depth/weights_v2_gray.pt', 'sam': expt_dvc_dpath / 'models/sam/sam_vit_h_4b8939.pth' } subset_name = src_fpath.name.split('.')[0] if subset_name.endswith('-rawbands'): subset_name = subset_name.rsplit('-', 1)[0] name_suffix = '_' + ub.hash_data(src_fpath)[0:8] outputs = { # 'rutgers_materials': aligned_bundle_dpath / (subset_name + '_rutgers_material_seg_v3' + config['kwcoco_ext']), 'rutgers_materials_v4': aligned_bundle_dpath / (subset_name + '_rutgers_material_seg_v4' + config['kwcoco_ext']), 'wu_mae': aligned_bundle_dpath / (subset_name + '_wu_mae' + config['kwcoco_ext']), 'dzyne_s2_landcover': aligned_bundle_dpath / (subset_name + '_dzyne_s2_landcover' + config['kwcoco_ext']), 'dzyne_wv_landcover': aligned_bundle_dpath / (subset_name + '_dzyne_wv_landcover' + config['kwcoco_ext']), 'dzyne_depth': aligned_bundle_dpath / (subset_name + '_dzyne_depth' + config['kwcoco_ext']), 'uky_invariants': aligned_bundle_dpath / (subset_name + '_uky_invariants' + config['kwcoco_ext']), 'cold': aligned_bundle_dpath / (subset_name + '_cold' + config['kwcoco_ext']), 'sam': aligned_bundle_dpath / (subset_name + '_sam' + config['kwcoco_ext']), } # print('Exist check: ') # print('model_packages: ' + ub.urepr(ub.map_vals(lambda x: x.exists(), model_fpaths))) # print('feature outputs: ' + ub.urepr(ub.map_vals(lambda x: x.exists(), outputs))) # TODO: different versions of features need different codes. codes = { 'with_s2_landcover': 'LS2', 'with_wv_landcover': 'LWV', 'with_depth': 'D', 'with_materials': 'M', 'with_mae': 'E', 'with_invariants2': 'I2', 'with_cold': 'C', 'with_sam': 'S', } # tmux queue is still limited. The order of submission matters. feature_nodes = [] combo_code_parts = [] key = 'with_s2_landcover' if config[key]: if config.check: simple_dvc.SimpleDVC().request(model_fpaths['dzyne_s2_landcover']) # Landcover is fairly fast to run node = ProcessNode( name=key + name_suffix, executable='python -m geowatch.tasks.landcover.predict', in_paths={ 'dataset': src_fpath, 'deployed': model_fpaths['dzyne_s2_landcover'], }, out_paths={ 'output': outputs['dzyne_s2_landcover'] }, algo_params={ 'with_hidden': config.num_s2_landcover_hidden, 'select_images': '.sensor_coarse == "S2"', 'assets_dname': config.assets_dname, }, perf_params={ 'device': 0, 'num_workers': data_workers, }, node_dpath='.', ) feature_nodes.append(node) combo_code_parts.append(codes[key]) key = 'with_wv_landcover' if config[key]: if config.check: simple_dvc.SimpleDVC().request(model_fpaths['dzyne_wv_landcover']) # Landcover is fairly fast to run node = ProcessNode( name=key + name_suffix, executable='python -m geowatch.tasks.landcover.predict', in_paths={ 'dataset': src_fpath, 'deployed': model_fpaths['dzyne_wv_landcover'], }, out_paths={ 'output': outputs['dzyne_wv_landcover'] }, algo_params={ 'with_hidden': config.num_wv_landcover_hidden, 'select_images': '.sensor_coarse == "WV"', 'assets_dname': config.assets_dname, }, perf_params={ 'device': 0, 'num_workers': data_workers, }, node_dpath='.', ) feature_nodes.append(node) combo_code_parts.append(codes[key]) from kwutil.util_yaml import Yaml key = 'with_cold' cold_config = Yaml.coerce(config.cold_config) or {} GRACEFUL_DEPRECATION_WORKAROUNDS = 1 if GRACEFUL_DEPRECATION_WORKAROUNDS: if 'workers' not in cold_config: cold_config['workers'] = config.cold_workers if 'workermode' not in cold_config: cold_config['workers'] = config.cold_workers config[key] = cold_config.get('enabled', config[key]) cold_config['enabled'] = config[key] if cold_config['enabled']: node = ProcessNode( name=key + name_suffix, executable='python -m geowatch.tasks.cold.predict', in_paths={ 'coco_fpath': src_fpath, }, out_paths={ 'mod_coco_fpath': outputs['cold'], 'out_dpath': src_fpath.parent, }, algo_params={ 'sensors': 'L8', 'adj_cloud': False, 'method': 'COLD', 'prob': 0.99, 'conse': 6, 'cm_interval': 60, 'year_lowbound': None, 'year_highbound': None, 'coefs': 'cv,rmse,a0,a1,b1,c1', 'coefs_bands': '0,1,2,3,4,5', 'timestamp': False, 'combine': False, 'resolution': '30GSD', }, perf_params={ 'workermode': 'process', 'workers': 2, }, node_dpath='.', ) WITH_S2 = 1 # hard coded if WITH_S2: node.algo_params.update({ 'sensors': 'L8,S2', 'conse': 8, 'resolution': '10GSD', }) node.configure(cold_config) feature_nodes.append(node) combo_code_parts.append(codes[key]) key = 'with_depth' if config[key]: if config.check: simple_dvc.SimpleDVC().request(model_fpaths['dzyne_depth']) # Only need 1 worker to minimize lag between images, task is GPU bound depth_data_workers = config['depth_workers'] if depth_data_workers == 'auto': import psutil import pint reg = pint.UnitRegistry() vmem_info = psutil.virtual_memory() total_gb = (vmem_info.total * reg.byte).to(reg.gigabyte).m avail_gb = (vmem_info.available * reg.byte).to(reg.gigabyte).m if avail_gb < 32: depth_data_workers = 0 elif avail_gb < 64: depth_data_workers = 1 else: depth_data_workers = 2 print('total_gb = {!r}'.format(total_gb)) print('avail_gb = {!r}'.format(avail_gb)) depth_window_size = 1440 node = ProcessNode( name=key + name_suffix, executable='python -m geowatch.tasks.depth.predict', in_paths={ 'dataset': src_fpath, 'deployed': model_fpaths['dzyne_depth'], }, out_paths={ 'output': outputs['dzyne_depth'], }, algo_params={ 'window_size': depth_window_size, }, perf_params={ # 'skip_existing': 1, 'data_workers': depth_data_workers, }, node_dpath='.', ) feature_nodes.append(node) combo_code_parts.append(codes[key]) key = 'with_materials' if config[key]: if config.check: simple_dvc.SimpleDVC().request(model_fpaths['rutgers_materials_model_v4']) node = ProcessNode( name=key + name_suffix, executable='python -m geowatch.tasks.rutgers_material_seg_v2.predict', in_paths={ 'kwcoco_fpath': src_fpath, 'model_fpath': model_fpaths['rutgers_materials_model_v4'], 'config_fpath': model_fpaths['rutgers_materials_config_v4'], }, out_paths={ 'output_kwcoco_fpath': outputs['rutgers_materials_v4'], }, algo_params={ }, perf_params={ 'workers': data_workers, }, node_dpath='.', ) feature_nodes.append(node) combo_code_parts.append(codes[key]) key = 'with_mae' if config[key]: if config.check: simple_dvc.SimpleDVC().request(model_fpaths['wu_mae_v1']) node = ProcessNode( name=key + name_suffix, executable=ub.codeblock( ''' python -m geowatch.tasks.mae.predict '''), in_paths={ 'input_kwcoco': src_fpath, 'mae_ckpt_path': model_fpaths['wu_mae_v1'], }, out_paths={ 'output_kwcoco': outputs['wu_mae'], }, algo_params={ 'assets_dname': config.assets_dname, }, perf_params={ 'workers': data_workers, }, node_dpath='.', ) feature_nodes.append(node) combo_code_parts.append(codes[key]) key = 'with_invariants2' if config[key]: if config.check: simple_dvc.SimpleDVC().request(model_fpaths['uky_pretext2']) if not model_fpaths['uky_pretext2'].exists(): print('Warning: UKY pretext model does not exist') # task['gpus'] = 1 # all_tasks = 'before_after segmentation pretext' node = ProcessNode( name=key + name_suffix, executable=ub.codeblock( ''' python -m geowatch.tasks.invariants.predict '''), in_paths={ 'input_kwcoco': src_fpath, 'pretext_package_path': model_fpaths['uky_pretext2'], 'pca_projection_path': model_fpaths['uky_pca'], }, out_paths={ 'output_kwcoco': outputs['uky_invariants'], }, algo_params={ 'assets_dname': config.assets_dname, 'input_resolution': config['invariant_resolution'], 'window_resolution': config['invariant_resolution'], 'patch_size': 256, 'patch_overlap': 0.3, 'do_pca': config['invariant_pca'], 'tasks': ['before_after', 'pretext'], }, perf_params={ 'workers': data_workers, 'io_workers': 0, }, node_dpath='.', ) feature_nodes.append(node) combo_code_parts.append(codes[key]) key = 'with_sam' if config[key]: if config.check: simple_dvc.SimpleDVC().request(model_fpaths['sam']) if not model_fpaths['sam'].exists(): print('Warning: SAM model does not exist') node = ProcessNode( name=key + name_suffix, executable=ub.codeblock( ''' python -m geowatch.tasks.sam.predict '''), in_paths={ 'input_kwcoco': src_fpath, 'weights_fpath': model_fpaths['sam'], }, out_paths={ 'output_kwcoco': outputs['sam'], }, algo_params={ 'assets_dname': config.assets_dname, 'window_overlap': 0.3, }, perf_params={ 'data_workers': data_workers, 'io_workers': 0, }, node_dpath='.', ) feature_nodes.append(node) combo_code_parts.append(codes[key]) # Determine what all of the output paths will be feature_paths = [] feature_output_nodes = [] for node in feature_nodes: node_features = [] for output in node.outputs.values(): if output.name == 'out_dpath': # hack to skip a non-feature output for COLD continue node_features.append(str(output.final_value)) feature_output_nodes.append(output) assert len(node_features) == 1, ( 'code assumes each node should have 1 feature output') feature_paths.extend(node_features) # Finalize features by combining them all into combo.kwcoco.json tocombine = [str(src_fpath)] + feature_paths combo_code = ''.join(sorted(combo_code_parts)) base_combo_fpath = aligned_bundle_dpath / (f'combo_{subset_name}_{combo_code}' + config['kwcoco_ext']) for node in feature_nodes: node.configure(node.config, cache=False) combine_node = ProcessNode( name='combine_features' + name_suffix, executable='python -m geowatch.cli.coco_combine_features', in_paths={ 'src': tocombine, }, out_paths={ 'dst': base_combo_fpath, }, ) # TODO: it would be nice if the mlops DAG allowed us to simply specify the # process level dependencies and assume we take care of the i/o level # dependencies. for output in feature_output_nodes: output.connect(combine_node.inputs['src']) combine_node.configure(combine_node.config, cache=False) nodes = [combine_node] + feature_nodes return nodes, base_combo_fpath main = prep_feats if __name__ == '__main__': """ CommandLine: DVC_DPATH=$(geowatch_dvc) python -m geowatch.cli.queue_cli.prepare_teamfeats \ --src_kwcocos="$DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json" \ --gres=0 \ --with_depth=0 \ --run=False --skip_existing=False --virtualenv_cmd "conda activate geowatch" \ --backend=serial python -m geowatch.cli.queue_cli.prepare_teamfeats --gres=0,2 --with_depth=True --keep_sessions=True python -m geowatch.cli.queue_cli.prepare_teamfeats --gres=2 --with_materials=False --keep_sessions=True # TODO: rename to schedule teamfeatures # TO UPDATE ANNOTS # Update to whatever the state of the annotations submodule is DVC_DPATH=$(geowatch_dvc) python -m geowatch reproject_annotations \ --src $DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json \ --dst $DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json \ --site_models="$DVC_DPATH/annotations/site_models/*.geojson" kwcoco stats $DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data_20220203.kwcoco.json $DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json # Team Features on Drop2 DVC_DPATH=$(geowatch_dvc) python -m geowatch.cli.queue_cli.prepare_teamfeats \ --src_kwcocos=$DVC_DPATH/Drop2-Aligned-TA1-2022-02-15/data.kwcoco.json \ --gres=0,1 --with_depth=0 --with_materials=False \ --run=0 ### DATASET_CODE=Aligned-Drop2-TA1-2022-02-24 DVC_DPATH=$(geowatch_dvc) DATASET_CODE=Drop2-Aligned-TA1-2022-02-15 KWCOCO_BUNDLE_DPATH=$DVC_DPATH/$DATASET_CODE python -m geowatch.cli.queue_cli.prepare_teamfeats \ --src_kwcocos=$KWCOCO_BUNDLE_DPATH/data.kwcoco.json \ --gres=0,1 \ --with_depth=1 \ --with_s2_landcover=1 \ --with_materials=1 \ --depth_workers=auto \ --skip_existing=0 --run=0 # Simple demo python -m geowatch.cli.queue_cli.prepare_teamfeats \ --src_kwcocos=./mydata/data.kwcoco.json \ --gres=0,1 \ --with_depth=0 \ --with_s2_landcover=1 \ --with_materials=1 \ --skip_existing=0 \ --backend=tmux \ --run=0 """ main(cmdline=True)