""" This is step 1 / 4 in predict.py SeeAlso: predict.py prepare_kwcoco.py * tile_processing_kwcoco.py export_cold_result_kwcoco.py assemble_cold_result_kwcoco.py This is a proof-of-concept for converting kwcoco files into the expected data structures for pycold. Relevant functions: * grab_demo_kwcoco_dataset - downloads a small kwcoco dataset for testing * stack_kwcoco - runs the stacking process on an entire kwcoco file * process_one_coco_image - runs the stacking for a single coco image. Limitations: * Currently only handles Landsat-8 * The quality bands are not exactly what I was expecting them to be, some of the quality filtering is stubbed out or disabled. * Not setup for an HPC environment yet, but that extension shouldn't be too hard. * Nodata values are currently not masked or handled * Configurations are hard-coded TODO: - [ ] Incorporate geowatch/tasks/fusion/datamodules/qa_bands.py """ import kwcoco import json import numpy as np import einops import functools import operator import ubelt as ub import itertools as it import logging from datetime import datetime as datetime_cls import numpy as geek import scriptconfig as scfg logger = logging.getLogger(__name__) try: from line_profiler import profile except ImportError: from ubelt import identity as profile class PrepareKwcocoConfig(scfg.DataConfig): """ The docstring will be the description in the CLI help """ coco_fpath = scfg.Value(None, help=ub.paragraph( ''' a path to a file to input kwcoco file ''')) out_dpath = scfg.Value(None, help='output directory for the output') sensors = scfg.Value('L8', type=str, help='sensor type, default is "L8"') adj_cloud = scfg.Value(False, help='How to treat QA band, default is False: ignoring adj. cloud class') method = scfg.Value(None, help=ub.paragraph( ''' stacking mode for original COLD or Hybrid, default is None, if HybridCOLD then stacked data include g, r, nir, swir16, swir22, ASI, tir, QA ''')) resolution = scfg.Value('30GSD', help='if specified then L8 and S2 data is processed at this resolution') resolution_PD = scfg.Value('3GSD', help='if specified then PD data is processed at this resolution') workers = scfg.Value(0, help='number of parallel workers') # TODO: # For each sensor, register the specific bands we are interested in. # This demo currently assumes landsat8 SENSOR_TO_INFO = {} SENSOR_TO_INFO['L8'] = { 'sensor_name': 'Landsat-8', 'intensity_channels': 'blue|green|red|nir|swir16|swir22|lwir11', 'quality_channels': 'quality', 'quality_interpretation': 'FMASK' } # The name of quality_channels for Drop 4 is 'cloudmask'. SENSOR_TO_INFO['S2'] = { 'sensor_name': 'Sentinel-2', 'intensity_channels': 'blue|green|red|nir|swir16|swir22|lwir11', 'quality_channels': 'quality', 'quality_interpretation': 'FMASK' } SENSOR_TO_INFO['PD'] = { 'sensor_name': 'Planetscope', 'intensity_channels': 'blue|green|red|nir', 'quality_channels': 'quality', 'quality_interpretation': 'FMASK' } # Register different quality bit standards. QA_INTERPRETATIONS = {} # These are specs for TA1 processed data QA_BIT = { 'clear': 1 << 0, 'cloud': 1 << 1, 'cloud_adj': 1 << 2, 'shadow': 1 << 3, 'snow': 1 << 4, 'water': 1 << 5, } QA_INTERPRETATIONS['FMASK'] = { 'clear': 0, 'water': 1, 'cloud_shadow': 2, 'snow': 3, 'cloud': 4, 'no_obs': 255, } QUALITY_BIT_INTERPRETATIONS = {} @profile def prepare_kwcoco_main(cmdline=1, **kwargs): """_summary_ Args: cmdline (int, optional): _description_. Defaults to 1. Ignore: python -m geowatch.tasks.cold.prepare_kwcoco --help TEST_COLD=1 xdoctest -m geowatch.tasks.cold.prepare_kwcoco prepare_kwcoco_main Example: >>> # xdoctest: +REQUIRES(env:TEST_COLD) >>> from geowatch.tasks.cold.prepare_kwcoco import prepare_kwcoco_main >>> from geowatch.tasks.cold.prepare_kwcoco import * >>> kwargs= dict( >>> coco_fpath = ub.Path('/home/jws18003/data/dvc-repos/smart_data_dvc/Drop6/data_vali_split1_KR_R001.kwcoco.json'), >>> out_dpath = ub.Path.appdir('/gpfs/scratchfs1/zhz18039/jws18003/kwcoco'), >>> sensors = 'L8,S2', >>> adj_cloud = False, >>> method = None, >>> ) >>> cmdline=0 >>> prepare_kwcoco_main(cmdline, **kwargs) """ pman = kwargs.pop('pman', None) config = PrepareKwcocoConfig.cli(cmdline=cmdline, data=kwargs) coco_fpath = config['coco_fpath'] dpath = ub.Path(config['out_dpath']).ensuredir() sensors = config['sensors'] adj_cloud = config['adj_cloud'] method = config['method'] out_dir = dpath / 'stacked' workers = config['workers'] # TODO: add resolution for PD data? resolution = config.resolution resolution_PD = config.resolution_PD sensors = list(sensors.split(",")) if 'PD' in sensors: sensors.remove('PD') meta_fpath = stack_kwcoco_PD(coco_fpath, out_dir, 'PD', adj_cloud, method, pman, workers, resolution_PD) if sensors: meta_fpath = stack_kwcoco(coco_fpath, out_dir, sensors, adj_cloud, method, pman, workers, resolution) return meta_fpath # function for decoding HLS qa band def qa_decoding(qa_array): """ This function is modified from qabitval_array_HLS function (https://github.com/GERSL/pycold/blob/c5b380eccc2916e5c3aec0bbd2b1982e114b75b1/src/python/pycold/imagetool/prepare_ard.py#L74) """ unpacked = np.full(qa_array.shape, QA_INTERPRETATIONS['FMASK']['clear']) QA_CLOUD_unpacked = geek.bitwise_and(qa_array, QA_BIT['cloud']) QA_CLOUD_ADJ = geek.bitwise_and(qa_array, QA_BIT['cloud_adj']) QA_SHADOW_unpacked = geek.bitwise_and(qa_array, QA_BIT['shadow']) QA_SNOW_unpacked = geek.bitwise_and(qa_array, QA_BIT['snow']) QA_WATER_unpacked = geek.bitwise_and(qa_array, QA_BIT['water']) unpacked[QA_WATER_unpacked > 0] = QA_INTERPRETATIONS['FMASK']['water'] unpacked[QA_SNOW_unpacked > 0] = QA_INTERPRETATIONS['FMASK']['snow'] unpacked[QA_SHADOW_unpacked > 0] = QA_INTERPRETATIONS['FMASK']['cloud_shadow'] unpacked[QA_CLOUD_ADJ > 0] = QA_INTERPRETATIONS['FMASK']['cloud'] unpacked[QA_CLOUD_unpacked > 0] = QA_INTERPRETATIONS['FMASK']['cloud'] unpacked[qa_array == QA_INTERPRETATIONS['FMASK'] ['no_obs']] = QA_INTERPRETATIONS['FMASK']['no_obs'] return unpacked def qa_decoding_no_boundary(qa_array): """ This function is modified from qabitval_array_HLS function (https://github.com/GERSL/pycold/blob/c5b380eccc2916e5c3aec0bbd2b1982e114b75b1/src/python/pycold/imagetool/prepare_ard.py#L74) """ unpacked = np.full(qa_array.shape, QA_INTERPRETATIONS['FMASK']['clear']) try: QA_CLOUD_unpacked = geek.bitwise_and(qa_array, QA_BIT['cloud']) QA_CLOUD_ADJ = geek.bitwise_and(qa_array, QA_BIT['cloud_adj']) QA_SHADOW_unpacked = geek.bitwise_and(qa_array, QA_BIT['shadow']) QA_SNOW_unpacked = geek.bitwise_and(qa_array, QA_BIT['snow']) QA_WATER_unpacked = geek.bitwise_and(qa_array, QA_BIT['water']) unpacked[QA_WATER_unpacked > 0] = QA_INTERPRETATIONS['FMASK']['water'] unpacked[QA_SNOW_unpacked > 0] = QA_INTERPRETATIONS['FMASK']['snow'] unpacked[QA_SHADOW_unpacked > 0] = QA_INTERPRETATIONS['FMASK']['cloud_shadow'] unpacked[QA_CLOUD_unpacked > 0] = QA_INTERPRETATIONS['FMASK']['cloud'] unpacked[QA_CLOUD_ADJ > 0] = QA_INTERPRETATIONS['FMASK']['clear'] unpacked[qa_array == QA_INTERPRETATIONS['FMASK'] ['no_obs']] = QA_INTERPRETATIONS['FMASK']['no_obs'] except TypeError as ex: import warnings warnings.warn(f'WARNING: COLD PREPARE KWCOCO Failed to qa decode ex={ex}') return unpacked def setup_logging(): # TODO: handle HPC things here in addition to stdout for doctests logging.basicConfig(level='INFO') ########################################################################## # Functions for Artificial Surface Index (ASI) # # See original code: https://github.com/GERSL/ASI_py/blob/main/ASI_standalone.py # ########################################################################## def hist_cut(band, mask, fill_value=-9999, k=3, minmax='std'): if minmax == 'std': mean = band[mask].mean() std = band[mask].std() low_val = (mean - k * std) high_val = (mean + k * std) else: low_val, high_val = minmax # use specified value range. is_low = band < low_val is_high = band > high_val mask_invalid_index = is_low | is_high band[mask_invalid_index] = fill_value return band, ~mask_invalid_index def minmax_norm(band, mask, fill_value=-9999): max_val = band[mask].max() min_val = band[mask].min() extent = max_val - min_val if extent != 0: shifted = band - min_val scaled = shifted / extent band[mask] = scaled[mask] band[~mask] = fill_value return band # Artificial Surface Index (ASI) is designed based the surface reflectance # imagery of Landsat 8. def artificial_surface_index( Blue, Green, Red, NIR, SWIR1, SWIR2, Scale, MaskValid_Obs, fillV): # The calculation chain. # Artificial surface Factor (AF). AF = (NIR - Blue) / (NIR + Blue) + 0.000001 AF, MaskValid_AF = hist_cut(AF, MaskValid_Obs, fillV, 6, [-1, 1]) MaskValid_AF_U = MaskValid_AF & MaskValid_Obs AF_Norm = minmax_norm(AF, MaskValid_AF_U, fillV) # Vegetation Suppressing Factor (VSF). # Modified Soil Adjusted Vegetation Index (MSAVI). MSAVI = ((2 * NIR + 1 * Scale) - np.sqrt((2 * NIR + 1 * Scale)**2 - 8 * (NIR - Red))) / 2 MSAVI, MaskValid_MSAVI = hist_cut(MSAVI, MaskValid_Obs, fillV, 6, [-1, 1]) NDVI = (NIR - Red) / (NIR + Red) + 0.000001 NDVI, MaskValid_NDVI = hist_cut(NDVI, MaskValid_Obs, fillV, 6, [-1, 1]) VSF = 1 - MSAVI * NDVI MaskValid_VSF = MaskValid_MSAVI & MaskValid_NDVI & MaskValid_Obs VSF_Norm = minmax_norm(VSF, MaskValid_VSF, fillV) # Soil Suppressing Factor (SSF). # Derive the Modified Bare soil Index (MBI). MBI = (SWIR1 - SWIR2 - NIR) / (SWIR1 + SWIR2 + NIR) + 0.5 MBI, MaskValid_MBI = hist_cut(MBI, MaskValid_Obs, fillV, 6, [-0.5, 1.5]) # Deriving Enhanced-MBI based on MBI and MNDWI. MNDWI = (Green - SWIR1) / (Green + SWIR1) + 0.000001 MNDWI, MaskValid_MNDWI = hist_cut(MNDWI, MaskValid_Obs, fillV, 6, [-1, 1]) EMBI = ((MBI + 0.5) - (MNDWI + 1)) / ((MBI + 0.5) + (MNDWI + 1)) EMBI, MaskValid_EMBI = hist_cut(EMBI, MaskValid_Obs, fillV, 6, [-1, 1]) # Derive SSF. SSF = (1 - EMBI) MaskValid_SSF = MaskValid_MBI & MaskValid_MNDWI & MaskValid_EMBI & MaskValid_Obs SSF_Norm = minmax_norm(SSF, MaskValid_SSF, fillV) # Modulation Factor (MF). MF = (Blue + Green - NIR - SWIR1) / (Blue + Green + NIR + SWIR1) + 0.000001 MF, MaskValid_MF = hist_cut(MF, MaskValid_Obs, fillV, 6, [-1, 1]) MaskValid_MF_U = MaskValid_MF & MaskValid_Obs MF_Norm = minmax_norm(MF, MaskValid_MF_U, fillV) # Derive Artificial Surface Index (ASI). ASI = AF_Norm * SSF_Norm * VSF_Norm * MF_Norm MaskValid_ASI = MaskValid_AF_U & MaskValid_VSF & MaskValid_SSF & MaskValid_MF_U & MaskValid_Obs ASI[~MaskValid_ASI] = fillV return ASI def stack_kwcoco(coco_fpath, out_dir, sensors, adj_cloud, method, pman=None, workers=0, resolution=None): """ Args: coco_fpath (str | PathLike | CocoDataset): the kwcoco dataset to convert out_dir (str | PathLike): path to write the data Returns: List[Dict]: a list of dictionary result objects Example: >>> # xdoctest: +SKIP >>> # TODO: readd this doctest >>> from pycold.imagetool.prepare_kwcoco import * # NOQA >>> setup_logging() >>> coco_dset = geowatch.coerce_kwcoco('geowatch-msi') >>> coco_fpath = coco_dset.fpath >>> #coco_fpath = grab_demo_kwcoco_dataset() >>> dpath = ub.Path.appdir('pycold/tests/stack_kwcoco').ensuredir() >>> out_dir = dpath / 'stacked' >>> results = stack_kwcoco(coco_fpath, out_dir) """ # TODO: configure out_dir = ub.Path(out_dir) # Load the kwcoco dataset dset = kwcoco.CocoDataset.coerce(coco_fpath) # Get all images ids sorted in temporal order per video all_images = dset.images(list(ub.flatten(dset.videos().images))) flags = [s in sensors for s in all_images.lookup('sensor_coarse')] all_images = all_images.compress(flags) if len(all_images) == 0: raise Exception('No images!') image_id_iter = iter(all_images) jobs = ub.JobPool(mode='process', max_workers=workers) if pman is not None: image_id_iter = pman.progiter(image_id_iter, desc='submit prepare jobs', transient=True) for image_id in image_id_iter: coco_image: kwcoco.CocoImage = dset.coco_image(image_id) coco_image = coco_image.detach() # Transform the image data into the desired block structure. jobs.submit(process_one_coco_image, coco_image, out_dir, adj_cloud, method, resolution) job_iter = jobs.as_completed() if pman is not None: job_iter = pman.progiter(job_iter, desc='collect prepare jobs') for job in job_iter: meta_fpath = job.result() return meta_fpath def stack_kwcoco_PD(coco_fpath, out_dir, sensors, adj_cloud, method, pman=None, workers=0, resolution=None): """ Args: coco_fpath (str | PathLike | CocoDataset): the kwcoco dataset to convert out_dir (str | PathLike): path to write the data Returns: List[Dict]: a list of dictionary result objects Example: >>> # xdoctest: +SKIP >>> # TODO: readd this doctest >>> from pycold.imagetool.prepare_kwcoco import * # NOQA >>> setup_logging() >>> coco_dset = geowatch.coerce_kwcoco('geowatch-msi') >>> coco_fpath = coco_dset.fpath >>> #coco_fpath = grab_demo_kwcoco_dataset() >>> dpath = ub.Path.appdir('pycold/tests/stack_kwcoco').ensuredir() >>> out_dir = dpath / 'stacked' >>> results = stack_kwcoco(coco_fpath, out_dir) """ # TODO: configure out_dir = ub.Path(out_dir) # Load the kwcoco dataset dset = kwcoco.CocoDataset.coerce(coco_fpath) # Get all images ids sorted in temporal order per video all_images = dset.images(list(ub.flatten(dset.videos().images))) flags = [s in sensors for s in all_images.lookup('sensor_coarse')] all_images = all_images.compress(flags) if len(all_images) == 0: raise Exception('No images!') image_id_iter = iter(all_images) jobs = ub.JobPool(mode='process', max_workers=workers) if pman is not None: image_id_iter = pman.progiter(image_id_iter, desc='submit prepare jobs', transient=True) for image_id in image_id_iter: coco_image: kwcoco.CocoImage = dset.coco_image(image_id) coco_image = coco_image.detach() # Transform the image data into the desired block structure. jobs.submit(process_one_coco_image, coco_image, out_dir, adj_cloud, method, resolution) job_iter = jobs.as_completed() if pman is not None: job_iter = pman.progiter(job_iter, desc='collect prepare jobs') for job in job_iter: meta_fpath = job.result() return meta_fpath @profile def process_one_coco_image(coco_image, out_dir, adj_cloud, method, resolution): """ Args: coco_image (kwcoco.CocoImage): the image to process out_dir (Path): path to write the image data resolution (str | None): resolution to process at (e.g. 30GSD). Returns: Dict: result dictionary with keys: status (str) : either a string passed or failed fpaths (List[str]): a list of files that were written """ n_block_x = 20 n_block_y = 20 is_partition = True # hard coded # Use the COCO name as a unique filename id. image_name = coco_image.img.get('name', None) video_name = coco_image.video.get('name', None) if image_name is None: image_name = 'img_{:06d}'.format(coco_image.img['id']) if video_name is None: video_name = 'vid_{:06d}'.format(coco_image.video['id']) # Preconstruct the file names used in the inner loops fname_json = (image_name + '.json') fname_npy = (image_name + '.npy') video_dpath = (out_dir / video_name).ensuredir() # Other relevant coco metadata date_captured = coco_image.img['date_captured'] ordinal_date = datetime_cls.strptime( date_captured[:10], '%Y-%m-%d').toordinal() # frame_index = coco_image.img['frame_index'] n_cols = coco_image.img['width'] n_rows = coco_image.img['height'] # Determine what sensor the image is from. # Note: if kwcoco needs to register more fine-grained sensor # information we can do that. sensor = coco_image.img['sensor_coarse'] # assert sensor == 'L8', 'MWE only supports landsat-8 for now' # Given the sensor, determine what the intensity and quality band # we should request are. sensor_info = SENSOR_TO_INFO[sensor] intensity_channels = sensor_info['intensity_channels'] quality_channels = sensor_info['quality_channels'] quality_interpretation = sensor_info['quality_interpretation'] quality_bits = QA_INTERPRETATIONS[quality_interpretation] # Specify how we are going to handle spatial resampling and nodata delay_kwargs = { 'space': 'video', 'resolution': resolution, } # FIXME: # We need to lookup the correct quality interpretation here HACK_QA = 1 if HACK_QA: # TODO: use # from geowatch.tasks.fusion.datamodules import qa_bands # if 'qa_pixel' in coco_image.channels: # quality_channels = 'qa_pixel' ... # Construct delayed images. These represent a tree of image # operations that will resample the image at the desired resolution # as well as align it with other images in the sequence. # NOTE: Issue occurs when setting resolution argument in imdelay delayed_im = coco_image.imdelay(channels=intensity_channels, nodata_method=None, **delay_kwargs) # FIXME: # nodata_method=ma should returned a masked images instead of nans when # underlying data doesn't exist. Will be fixed in next kwcoco / delayed # image delayed_qa = coco_image.imdelay(channels=quality_channels, nodata_method=None, **delay_kwargs) # Check what shape the data would be loaded with if we finalized right now. h, w = delayed_im.shape[0:2] # Determine if padding is necessary to properly break the data into blocks. padded_w = int(np.ceil(w / n_block_x) * n_block_x) padded_h = int(np.ceil(h / n_block_y) * n_block_y) if padded_w != w or padded_h != h: # cropping using an oversized slice with clip=False and wrap=False is # equivalent to padding. In the future a more efficient pad operation # where the padding value can be specified will be added, but this will # work well enough for now. slice_ = (slice(0, padded_h), slice(0, padded_w)) delayed_im = delayed_im.crop(slice_, clip=False, wrap=False) delayed_qa = delayed_qa.crop(slice_, clip=False, wrap=False) # It is important that the categorical QA band is not interpolated or # antialiased, whereas the intensity bands should be. qa_data = delayed_qa.finalize(interpolation='nearest', antialias=False, optimize=False) # Decoding QA band if adj_cloud: qa_unpacked = qa_decoding(qa_data) else: qa_unpacked = qa_decoding_no_boundary(qa_data) # First check the quality bands before loading all of the image data. # FIXME: the quality bits in this example are wrong. # Setting the threshold to zero to bypass for now. clear_threshold = 0 if clear_threshold > 0: clear_bits = functools.reduce( operator.or_, ub.take(quality_bits, ['clear_land', 'clear_water'])) noobs_bits = functools.reduce( operator.or_, ub.take(quality_bits, ['no_observation'])) is_clear = (qa_data & clear_bits) > 0 is_noobs = (qa_data & noobs_bits) > 0 is_obs = ~is_noobs is_obs_clear = is_clear & is_obs clear_ratio = is_obs_clear.sum() / is_obs.sum() else: clear_ratio = 1 if clear_ratio <= clear_threshold: logger.warn('Not enough clear observations for {}/{}'.format( video_name, image_name)) return im_data = delayed_im.finalize(interpolation='cubic', antialias=True) # Zero padding affects margin block not to process COLD, leading missing npy in the second step. # To avoid this issue, filled with fake data in the padding area. # This will fix predict script running forever without raising error. # This issue will be addressed in export_cold_result_kwcoco.py, not here # padding_value_col = im_data[:, :padded_w - w] # im_data[:, w:] = padding_value_col # padding_value_row = im_data[:padded_h - h, :] # im_data[h:] = padding_value_row if method == 'ASI': Scale = 10000 fill_value = 0 B1 = im_data[:, :, 0] B2 = im_data[:, :, 1] B3 = im_data[:, :, 2] B4 = im_data[:, :, 3] B5 = im_data[:, :, 4] B6 = im_data[:, :, 5] MaskValid_Obs = ((B1 > 0) & (B1 < 1 * Scale) & (B2 > 0) & (B2 < 1 * Scale) & (B3 > 0) & (B3 < 1 * Scale) & (B4 > 0) & (B4 < 1 * Scale) & (B5 > 0) & (B5 < 1 * Scale) & (B6 > 0) & (B6 < 1 * Scale) ) # Calculating ASI ASI = artificial_surface_index( B1.astype( np.float32), B2.astype( np.float32), B3.astype( np.float32), B4.astype( np.float32), B5.astype( np.float32), B6.astype( np.float32), Scale, MaskValid_Obs, fill_value) # Get land mask. MNDWI = (B2 - B5) / (B2 + B5) MNDWI, MaskValid_MNDWI = hist_cut( MNDWI, MaskValid_Obs, fill_value, 6, [-1, 1]) # Water threshold for MNDWI (may need to be adjusted for different # study areas). Water_Th = 0 MaskLand = (MNDWI < Water_Th) # Convert dtype from float32 to int16 ASI = ASI * Scale ASI = ASI.astype('int16') ASI[ASI == 0] = fill_value # Exclude water pixels. ASI[~MaskLand] = fill_value ASI = ASI.reshape(ASI.shape[0], ASI.shape[1], 1) false_band = np.full((ASI.shape[0], ASI.shape[1], 1), 0) # input for Hybrid-COLD (with ASI) = B2, B3, B4, B5, B6, ASI data = np.concatenate( [im_data[:, :, 1:6], ASI, false_band, qa_unpacked], axis=2) else: data = np.concatenate([im_data, qa_unpacked], axis=2) result_fpaths = [] metadata = { 'image_name': image_name, 'date_captured': date_captured, 'ordinal_date': ordinal_date, 'region_id': video_name, 'n_cols': n_cols, 'n_rows': n_rows, 'video_w': w, 'video_h': h, 'padded_n_cols': padded_w, 'padded_n_rows': padded_h, 'n_block_x': n_block_x, 'n_block_y': n_block_y, 'adj_cloud': adj_cloud, 'method': method, 'sensor': sensor, } if is_partition: bw = int(padded_w / n_block_x) # width of a block bh = int(padded_h / n_block_y) # height of a block # Use einops to rearrange the data into blocks # Question: would using numpy strided tricks be faster? blocks = einops.rearrange( data, '(nby bh) (nbx bw) c -> nbx nby bh bw c', bw=bw, bh=bh) # FIXME: Disable skipping until QA bands are handled correctly SKIP_BLOCKS_WITH_QA = False for i, j in it.product(range(n_block_y), range(n_block_x)): block = blocks[i, j] bh, bw = block.shape[0:2] if SKIP_BLOCKS_WITH_QA: # check if no valid pixels in the chip, then eliminate qa_unique = np.unique(block[..., -1]) qa_unique # skip blocks are all cloud, shadow or filled values in DHTC, # we also don't need to save pixel that has qa value of # 'QA_CLOUD', 'QA_SHADOW', or FILLED value (255) if ... and False: continue block_dname = 'block_x{}_y{}'.format(i + 1, j + 1) block_dpath = video_dpath / block_dname block_fpath = block_dpath / fname_npy metadata.update({ 'x': i + 1, 'y': j + 1, 'total_pixels': bw * bh, 'total_bands': int(block.shape[-1]), }) if not block_fpath.exists(): block_dpath.mkdir(exist_ok=True) meta_fpath = block_dpath / fname_json meta_fpath.write_text(json.dumps(metadata)) np.save(block_fpath, block) result_fpaths.append(block_fpath) result_fpaths.append(meta_fpath) logger.info( 'Stacked blocked image {}/{}'.format(video_name, image_name)) else: # FIXME: seems broken. Comment on why and raise a NotImplementedError # if this is true. metadata.update({ 'total_pixels': int(np.prod(data.shape[0:2])), 'total_bands': int(data.shape[-1]), }) full_fpath = video_dpath / fname_npy if not block_fpath.exists(): meta_fpath = video_dpath / fname_json meta_fpath.write_text(json.dumps(metadata)) np.save(full_fpath, data) result_fpaths.append(full_fpath) result_fpaths.append(meta_fpath) logger.info( 'Stacked full image {}/{}'.format(video_name, image_name)) # FIXME: It seems strange that we return one of the metadata paths for just # one of the blocks instead of having a metadata file for all blocks. meta_fpath = block_dpath / (image_name + '.json') return meta_fpath if __name__ == '__main__': prepare_kwcoco_main()