根据XML文件划分数据集

根据XML文件划分数据集,平均划分为5份,并将文件名写入数据集

import os
import random

f1 = 0.8
f2 = 0.75
f3 = 0.667
f4 = 0.5

xmlfilepath = 'F:/VOC/Annotations'
txtsavepath = 'C:/Users/123/Desktop/1'
total_xml = os.listdir(xmlfilepath)

num = len(total_xml)
list = range(num)    # 4218
f1 = int(num * f1)   # 3374
f2 = int(f1 * f2)    # 2531
f3 = int(f2 * f3)    # 1688
f4 = int(f3 * f4)    # 844


d1 = random.sample(list, f1)
d2 = random.sample(d1, f2)
d3 = random.sample(d2, f3)
d4 = random.sample(d3, f4)


y1 = open('C:/Users/123/Desktop/1/data1.txt', 'w')
y2 = open('C:/Users/123/Desktop/1/data2.txt', 'w')
y3 = open('C:/Users/123/Desktop/1/data3.txt', 'w')
y4 = open('C:/Users/123/Desktop/1/data4.txt', 'w')
y5 = open('C:/Users/123/Desktop/1/data5.txt', 'w')

for i in list:
    name = total_xml[i][:-4] + '\n'
    if i not in d1:
        y1.write(name)
    if i in d1:
        if i not in d2:
            y2.write(name)
    if i in d1:
        if i in d2:
            if i not in d3:
                y3.write(name)
    if i in d1:
        if i in d2:
            if i in d3:
                if i not in d4:
                    y4.write(name)
    if i in d1:
        if i in d2:
            if i in d3:
                if i in d4:
                    y5.write(name)



y1.close()
y2.close()
y3.close()
y4.close()
y5.close()

版权声明:本文为awdfggg原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。