深度学习从入门到精通——VOC 2012数据读取(pytorch)-天翼云

深度学习从入门到精通——VOC 2012数据读取(pytorch)

2024-11-06 07:12:42 阅读次数：30

from torch.utils.data import Dataset
import os
import torch
import json
from PIL import Image
from lxml import etree


class VOC2012DataSet(Dataset):
    """读取解析PASCAL VOC2012数据集"""

    def __init__(self, voc_root, transforms, txt_name: str = "train.txt"):
        self.root = os.path.join(voc_root, "VOCdevkit", "VOC2012")
        self.img_root = os.path.join(self.root, "JPEGImages")
        self.annotations_root = os.path.join(self.root, "Annotations")

        # read train.txt or val.txt file
        txt_path = os.path.join(self.root, "ImageSets", "Main", txt_name)
        assert os.path.exists(txt_path), "not found {} file.".format(txt_name)

        with open(txt_path) as read:
            self.xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml")
                             for line in read.readlines()]

        # check file
        assert len(self.xml_list) > 0, "in '{}' file does not find any information.".format(txt_path)
        for xml_path in self.xml_list:
            assert os.path.exists(xml_path), "not found '{}' file.".format(xml_path)

        # read class_indict
        try:
            json_file = open('./pascal_voc_classes.json', 'r')
            self.class_dict = json.load(json_file)
        except Exception as e:
            print(e)
            exit(-1)

        self.transforms = transforms

    def __len__(self):
        return len(self.xml_list)

    def __getitem__(self, idx):
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        img_path = os.path.join(self.img_root, data["filename"])
        image = Image.open(img_path)
        if image.format != "JPEG":
            raise ValueError("Image format not JPEG")
        boxes = []
        labels = []
        iscrowd = []
        for obj in data["object"]:
            xmin = float(obj["bndbox"]["xmin"])
            xmax = float(obj["bndbox"]["xmax"])
            ymin = float(obj["bndbox"]["ymin"])
            ymax = float(obj["bndbox"]["ymax"])
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_dict[obj["name"]])
            iscrowd.append(int(obj["difficult"]))

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=64)
        iscrowd = torch.as_tensor(iscrowd, dtype=64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

    def get_height_and_width(self, idx):
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        data_height = int(data["size"]["height"])
        data_width = int(data["size"]["width"])
        return data_height, data_width

    def parse_xml_to_dict(self, xml):
        """
        将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict
        Args:
            xml: xml tree obtained by parsing XML file contents using lxml.etree

        Returns:
            Python dictionary holding XML contents.
        """

        if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息
            return {xml.tag: xml.text}

        result = {}
        for child in xml:
            child_result = self.parse_xml_to_dict(child)  # 递归遍历标签信息
            if child.tag != 'object':
                result[child.tag] = child_result[child.tag]
            else:
                if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里
                    result[child.tag] = []
                result[child.tag].append(child_result[child.tag])
        return {xml.tag: result}

    def coco_index(self, idx):
        """
        该方法是专门为pycocotools统计标签信息准备，不对图像和标签作任何处理
        由于不用去读取图片，可大幅缩减统计时间

        Args:
            idx: 输入需要获取图像的索引
        """
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        data_height = int(data["size"]["height"])
        data_width = int(data["size"]["width"])
        # img_path = os.path.join(self.img_root, data["filename"])
        # image = Image.open(img_path)
        # if image.format != "JPEG":
        #     raise ValueError("Image format not JPEG")
        boxes = []
        labels = []
        iscrowd = []
        for obj in data["object"]:
            xmin = float(obj["bndbox"]["xmin"])
            xmax = float(obj["bndbox"]["xmax"])
            ymin = float(obj["bndbox"]["ymin"])
            ymax = float(obj["bndbox"]["ymax"])
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_dict[obj["name"]])
            iscrowd.append(int(obj["difficult"]))

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=64)
        iscrowd = torch.as_tensor(iscrowd, dtype=64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        return (data_height, data_width), target

    @staticmethod
    def collate_fn(batch):
        return tuple(zip(*batch))

if __name__ == '__main__':
    data = VOC2012DataSet(r"D:/",transforms=None)
    print(data[0][1]["boxes"])

深度学习从入门到精通——VOC 2012数据读取(pytorch)

活动

智算服务

应用商城

合作伙伴

开发者

支持与服务

了解天翼云

深度学习从入门到精通——VOC 2012数据读取(pytorch)

深度学习从入门到精通——VOC 2012数据读取(pytorch)

相关文章

【30天玩转python】文件操作

零基础玩转C语言系列第三章——循环语句

Redis经典问题：数据并发竞争

理解 Linux 文件结构：一份简单易懂的入门教程

JavaI/O编程---File文件操作

C#读取shp的属性表dbf文件

从csv文件读取或者创建字典类型dict读取时间每列年月日时分秒信息合并成一列里显示2025-01-04 10:45:15

1.Matlab图像的读取和显示

【shell基础（12）循环之while、select】while：管道、重定向循环读取文件；select：菜单拓展循环

java读取shapefile且用arcgis for js展示

作者介绍

最新文章

【epoll】epoll的水平触发和边沿触发，及为什么边沿触发必须使用非阻塞？

Paimon 是什么？Apache Paimon简介

【PyTorch】 torch.flatten()与nn.Flatten()的区别

pytorch 构造读取数据的工具类 Dataset 与 DataLoader （pytorch Data学习一）

pytorch 保存、读取 tensor 数据

pytorch修改tensor数据类型

热门文章

pytorch 保存、读取 tensor 数据

pytorch 构造读取数据的工具类 Dataset 与 DataLoader （pytorch Data学习一）

【PyTorch】 torch.flatten()与nn.Flatten()的区别

解决pytorch DataLoader 加载数据报错UnicodeDecodeError: ‘utf-8‘ codec can‘t decode byte 0xe5 in position 1023

pytorch修改tensor数据类型

Paimon 是什么？Apache Paimon简介

热门标签

相关产品

弹性云主机

天翼云电脑（公众版）

对象存储

云硬盘

随机文章

解决pytorch DataLoader 加载数据报错UnicodeDecodeError: ‘utf-8‘ codec can‘t decode byte 0xe5 in position 1023

pytorch修改tensor数据类型

Paimon 是什么？Apache Paimon简介

pytorch 构造读取数据的工具类 Dataset 与 DataLoader （pytorch Data学习一）

【PyTorch】 torch.flatten()与nn.Flatten()的区别

【epoll】epoll的水平触发和边沿触发，及为什么边沿触发必须使用非阻塞？