干货python划分数据集

import os
import random
import shutil
import time

def copyFile(fileDir, class_name):
    image_list = os.listdir(fileDir) # 获取图片的原始路径,列出子文件夹
    image_number = len(image_list)
    train_number = int(image_number * train_rate)
    train_sample = random.sample(image_list, train_number)  # 从image_list中随机获取0.8比例的图像.
    test_sample = list(set(image_list) - set(train_sample))
    sample = [train_sample, test_sample]

    # 复制图像到目标文件夹
    for k in range(len(save_dir)):
        # os.makedirs(save_dir[k] + class_name)
        # for name in sample[k]:
        #     shutil.copy(os.path.join(fileDir, name), os.path.join(save_dir[k], class_name, name))
        if os.path.isdir(save_dir[k] + '/' + class_name):
            for name in sample[k]:
                shutil.copy(os.path.join(fileDir, name),
                            os.path.join(save_dir[k] + '/' + class_name + '/', name))  # 连接两个或更多的路径名组件
        else:
            os.makedirs(save_dir[k] + '/' + class_name)
            for name in sample[k]:
                shutil.copy(os.path.join(fileDir, name), os.path.join(save_dir[k] + '/' + class_name + '/', name))
if __name__ == 'main':
    time_start = time.time()
    # 原始数据集路径
    origion_path = './data/a256'

    # 保存路径
    save_train_dir = './data3/train'
    save_test_dir = './data3/test'
    save_dir = [save_train_dir, save_test_dir]

    # 训练集比例
    train_rate = 0.8

    # 数据集类别及数量
    file_list = os.listdir(origion_path)
    num_classes = len(file_list)

    for i in range(num_classes):
        class_name = file_list[i]
        image_Dir = os.path.join(origion_path, class_name)
        copyFile(image_Dir, class_name)
        print('%s划分完毕!' % class_name)

    time_end = time.time()
    print('---------------')
    print('训练集和测试集划分共耗时%s!' % (time_end - time_start))


参考别人博客(我没有保存链接,博主名字可能叫李明),修改得以运行。现在不乱了吧。


版权声明:本文为qq_42037273原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。