import datasets
import daft
daft.set_execution_config(enable_native_executor=True, default_morsel_size=256)
ds = datasets.load_dataset("detection-datasets/fashionpedia")
df_train = daft.from_arrow(ds["train"].data.table[:1000])
fashionpedia_num_classes = ds["train"].features["objects"].feature["category"].num_classes
hf_img_to_daft_img = daft.col("image").struct.get("bytes").image.decode()
df_train = df_train.with_column(
"image", hf_img_to_daft_img
)
df_train.show(2)| image_id Int64 |
image Image[MIXED] |
width Int64 |
height Int64 |
objects Struct[bbox_id: List[Int64], category: List[Int64], bbox: List[FixedSizeList[Float64; 4]], area: List[Int64]] |
|---|---|---|---|---|
23
|
682
|
1024
|
{bbox_id: [150311, 150312, 150313, 150314], category: [23, 23, 33, 10], bbox: [[445, 910, 505, 983], [239, 940, 284, 994], [298, 282, 386, 352], [210, 282, 448, 665]], area: [1422, 843, 373, 56375], } |
|
25
|
683
|
1024
|
{bbox_id: [158953, 158954, 158955, 158956, 158957, 158958, 158959, 158960, 158961, 158962], category: [2, 33, 31, 31, 13, 7, 22, 22, 23, 23], bbox: [[182, 220, 472, 647], [294, 221, 407, 257], [405, 297, 472, 647], [182, 264, 266, 621], [284, 135, 372, 169], [238, 537, 414, 606], [351, 732, 417, 922], [202, 749, 270, 930], [200, 921, 256, 979], [373, 903, 455, 966]], area: [87267, 1220, 16895, 18541, 1468, 9360, 8629, 8270, 2717, 3121], } |