其他分享
首页 > 其他分享> > 划分训练集和测试集和验证集

划分训练集和测试集和验证集

作者:互联网

划分训练集和测试集和验证集:

import os
import codecs
import random
random.seed(1229)

data = []
with codecs.open('neg.txt', "r", encoding='utf-8', errors='ignore') as fdata:
    now = fdata.readlines()
    data.append(['0 ' + item for item in now])
with codecs.open('pos.txt', "r", encoding='utf-8', errors='ignore') as fdata:
    now = fdata.readlines()
    data.append(['1 ' + item for item in now])

def get_test(data, n, x):
    st, ed = len(data) * x // n, len(data) * (x+1) // n
    return data[st:ed]

def get_train(data, n, x):
    st, ed = len(data) * x // n, len(data) * (x+1) // n
    return data[:st] + data[ed:]

for i in range(10):
    train_ori = [get_train(item, 10, i) for item in data]
    test_ori = [get_test(item, 10, i) for item in data]
    
    train = []
    dev = []
    test = []
    for j in range(2):
        random.shuffle(train_ori[j])
        x = len(train_ori[j]) * 9 // 10
        train += train_ori[j][:x]
        dev += train_ori[j][x:]
        test += test_ori[j]
    random.shuffle(train)
    random.shuffle(dev)
    random.shuffle(test)
    os.system('mkdir mr%s' % i)
    open('mr%s/train.txt' % i, 'w').writelines(train)
    open('mr%s/dev.txt' % i, 'w').writelines(dev)
    open('mr%s/test.txt' % i, 'w').writelines(test)

标签:训练,验证,random,item,train,测试,ori,test,data
来源: https://blog.51cto.com/u_14540820/2759522