import datasets
import daft

daft.set_execution_config(enable_native_executor=True, default_morsel_size=256)
ds = datasets.load_dataset("detection-datasets/fashionpedia")
df_train = daft.from_arrow(ds["train"].data.table[:1000])

fashionpedia_num_classes = ds["train"].features["objects"].feature["category"].num_classes
hf_img_to_daft_img = daft.col("image").struct.get("bytes").image.decode()

df_train = df_train.with_column(
    "image", hf_img_to_daft_img
)
df_train.show(2)
image_id
Int64
image
Image[MIXED]
width
Int64
height
Int64
objects
Struct[bbox_id: List[Int64], category: List[Int64], bbox: List[FixedSizeList[Float64; 4]], area: List[Int64]]
23
<Image>
682
1024
{bbox_id: [150311, 150312, 150313, 150314],
category: [23, 23, 33, 10],
bbox: [[445, 910, 505, 983], [239, 940, 284, 994], [298, 282, 386, 352], [210, 282, 448, 665]],
area: [1422, 843, 373, 56375],
}
25
<Image>
683
1024
{bbox_id: [158953, 158954, 158955, 158956, 158957, 158958, 158959, 158960, 158961, 158962],
category: [2, 33, 31, 31, 13, 7, 22, 22, 23, 23],
bbox: [[182, 220, 472, 647], [294, 221, 407, 257], [405, 297, 472, 647], [182, 264, 266, 621], [284, 135, 372, 169], [238, 537, 414, 606], [351, 732, 417, 922], [202, 749, 270, 930], [200, 921, 256, 979], [373, 903, 455, 966]],
area: [87267, 1220, 16895, 18541, 1468, 9360, 8629, 8270, 2717, 3121],
}
(Showing first 2 rows)
import torch


def apply_torch_transform(bboxes):
    bboxes = torch.tensor(bboxes, dtype=torch.float32)
    return bboxes


df_train = df_train.with_columns(
    {
        "bboxes": daft.col("objects")
        .struct.get("bbox")
        .apply(
            apply_torch_transform,
            return_dtype=daft.DataType.tensor(daft.DataType.float32()),
        ),
        "img_tensor": daft.col("image").cast(daft.DataType.tensor(dtype=daft.DataType.uint8()))
    }
)
df_train.show(2)
image_id
Int64
image
Image[MIXED]
width
Int64
height
Int64
objects
Struct[bbox_id: List[Int64], category: List[Int64], bbox: List[FixedSizeList[Float64; 4]], area: List[Int64]]
bboxes
Tensor(Float32)
img_tensor
Tensor(UInt8)
23
<Image>
682
1024
{bbox_id: [150311, 150312, 150313, 150314],
category: [23, 23, 33, 10],
bbox: [[445, 910, 505, 983], [239, 940, 284, 994], [298, 282, 386, 352], [210, 282, 448, 665]],
area: [1422, 843, 373, 56375],
}
<Tensor shape=(4, 4)>
<Tensor shape=(1024, 682, 3)>
25
<Image>
683
1024
{bbox_id: [158953, 158954, 158955, 158956, 158957, 158958, 158959, 158960, 158961, 158962],
category: [2, 33, 31, 31, 13, 7, 22, 22, 23, 23],
bbox: [[182, 220, 472, 647], [294, 221, 407, 257], [405, 297, 472, 647], [182, 264, 266, 621], [284, 135, 372, 169], [238, 537, 414, 606], [351, 732, 417, 922], [202, 749, 270, 930], [200, 921, 256, 979], [373, 903, 455, 966]],
area: [87267, 1220, 16895, 18541, 1468, 9360, 8629, 8270, 2717, 3121],
}
<Tensor shape=(10, 4)>
<Tensor shape=(1024, 683, 3)>
(Showing first 2 rows)
sample = df_train[["bboxes", "img_tensor"]].limit(1).to_pydict()
import albumentations as A

transforms = A.Compose([
    A.RandomResizedCrop(size=(224, 224), antialias=True),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
],
bbox_params=A.BboxParams(format="pascal_voc", label_fields=["category_id"]))
transforms(**sample)
/var/folders/mz/q18tmv191p571kqg_6sn8skh0000gn/T/ipykernel_72438/1286868144.py:4: UserWarning: Argument 'antialias' is not valid and will be ignored.
  A.RandomResizedCrop(size=(224, 224), antialias=True),
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[31], line 8
      1 import albumentations as A
      3 transforms = A.Compose([
      4     A.RandomResizedCrop(size=(224, 224), antialias=True),
      5     A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
      6 ],
      7 bbox_params=A.BboxParams(format="pascal_voc", label_fields=["category_id"]))
----> 8 transforms(image=sample["img_tensor"][0], bboxes=sample["bboxes"][0])

File ~/git/londogard/code/data_loading/.pixi/envs/default/lib/python3.12/site-packages/albumentations/core/composition.py:334, in Compose.__call__(self, force_apply, *args, **data)
    331 if not need_to_run:
    332     return data
--> 334 self.preprocess(data)
    336 for t in self.transforms:
    337     data = t(**data)

File ~/git/londogard/code/data_loading/.pixi/envs/default/lib/python3.12/site-packages/albumentations/core/composition.py:366, in Compose.preprocess(self, data)
    364 if self.main_compose:
    365     for p in self.processors.values():
--> 366         p.ensure_data_valid(data)
    367     for p in self.processors.values():
    368         p.preprocess(data)

File ~/git/londogard/code/data_loading/.pixi/envs/default/lib/python3.12/site-packages/albumentations/core/bbox_utils.py:120, in BboxProcessor.ensure_data_valid(self, data)
    118 if self.params.label_fields and not all(i in data for i in self.params.label_fields):
    119     msg = "Your 'label_fields' are not valid - them must have same names as params in dict"
--> 120     raise ValueError(msg)

ValueError: Your 'label_fields' are not valid - them must have same names as params in dict

from daft import DataType as dt

@daft.udf(return_dtype=dt.tensor(dt.float32))
def transform(image, bbox):
    out = transforms(image=image, bbox=bbox)
    print(out)
    return out["image"]
df_train[0]
# df_train.with_columns(transform(daft.col("img_tensor"), daft.col("bboxes")))