#!/usr/bin/env python3 """ Group sites by "positive" / "negative" type of change and then export them as specific regions. Individual paths may require munging. """ from watch.utils import util_gis import cmd_queue import scriptconfig as scfg import ubelt as ub import watch import json class CroppedValidateDataset(scfg.DataConfig): src_dvc_dpath = 'auto' dst_dvc_dpath = 'auto' def normalize(self): if self.src_dvc_dpath == 'auto': self.src_dvc_dpath = watch.find_dvc_dpath(tags='phase2_data', hardware='ssd') if self.dst_dvc_dpath == 'auto': self.dst_dvc_dpath = watch.find_dvc_dpath(tags='phase2_data', hardware='hdd') def group_site_models(config): data_dvc_dpath = config.src_dvc_dpath region_dpath = data_dvc_dpath / 'annotations/drop6/region_models' region_datas = list(util_gis.coerce_geojson_datas(region_dpath, format='json', workers=16)) output_dpath = (data_dvc_dpath / 'annotations' / '_temp' / 'region_partition') grouped_region_fpaths = ub.ddict(list) # Group site summaries within regions and write them to new files. for info in region_datas: region = info['data'] grouped_feats = ub.ddict(list) region_row = None for feat in region['features']: props = feat['properties'] if props['type'] == 'region': assert region_row is None region_row = feat elif props['type'] in {'site_summary', ' site_summary'}: status = props['status'] = props['status'].strip().lower() props['type'] = props['type'].strip() # fix issue grouped_feats[status].append(feat) else: raise KeyError assert region_row is not None for status, feats in grouped_feats.items(): part_name = region_row['properties']['region_id'] + '_' + status region_part = region.copy() region_part.pop('features') region_part['name'] = part_name region_part['features'] = [region_row] + list(feats) dpath = (output_dpath / status).ensuredir() fpath = dpath / (part_name + '.geojson') fpath.write_text(json.dumps(region_part)) grouped_region_fpaths[status].append(fpath) return grouped_region_fpaths def submit_crop_jobs(config, grouped_region_fpaths): # In case src / dst dvc paths are different. # If needed set them to be the same src_dvc_dpath = watch.find_dvc_dpath(tags='phase2_data', hardware='ssd') dst_dvc_dpath = watch.find_dvc_dpath(tags='phase2_data', hardware='hdd') output_dpath = (dst_dvc_dpath / 'Validation-V1').ensuredir() queue = cmd_queue.Queue.create(backend='tmux', size=8, dpath=output_dpath / '_queue') # TODO: could choose a region specific file more intelligently src_bundle_dpath = src_dvc_dpath / 'Drop6' src_fpath = src_bundle_dpath / 'data.kwcoco.zip' for status, fpaths in grouped_region_fpaths.items(): status_dpath = (output_dpath / status).ensuredir() align_workers = 2 align_aux_workers = 2 for region_fpath in fpaths: # Command to crop to each site summary in the region. dst_dpath = (status_dpath / region_fpath.stem).ensuredir() command = ub.codeblock( rf''' python -m geowatch.cli.coco_align \ --src "{src_fpath}" \ --dst "{dst_dpath}" \ --regions "{region_fpath}" \ --minimum_size="128x128@10GSD" \ --context_factor=1 \ --geo_preprop=auto \ --force_nodata=-9999 \ --site_summary=True \ --target_gsd=5 \ --aux_workers={align_aux_workers} \ --workers={align_workers} \ ''') queue.submit(command) queue.run() def main(): config = CroppedValidateDataset.cli() print('config = ' + ub.urepr(dict(config), nl=1)) grouped_region_fpaths = group_site_models(config) submit_crop_jobs(config, grouped_region_fpaths) if __name__ == '__main__': """ CommandLine: python ~/code/watch/dev/oneoffs/make_site_validation_initial_dataset.py """ main()