I have a time series data (counts of the number of infectious individuals per day), and I am using a SimpleRNN from Keras to classify each of these epidemics into one of eight possible classes.
I used cross-validation to select the best number of hidden units, etc. My problem is that when I run the chosen model multiple times (that is when I train the model multiple times with the same architecture), I get very different results. I'm guessing this is a problem with many local minima, but I'm not sure - it could just be a problem with training. How can I diagnose this? I've done the following:
Removed the patience term - I thought I might not be training long enough, and the wildly different classification errors were due to that.
Changed my activation functions to ReLUs instead of using the default tanh units - maybe there's some saturation going on.
Changed my
load_data
function to scale the input data (which are integers).
Can anyone think of anything else that I could do to try to figure out why the same code is giving me different classification errors?
EDIT: More information - I'm using an RNN for an 8-class classification task. There are 200 observations per class in the test set. I obtain confusion matrices for each of the 10 runs shown in the code below. Here are two of the confusion matrices:
189,0,0,0,0,11,0,0
0,197,0,0,0,3,0,0
0,0,199,0,0,1,0,0
1,0,0,195,0,0,0,4
0,0,0,0,200,0,0,0
26,0,0,0,0,174,0,0
0,0,0,0,0,0,200,0
0,0,0,0,0,0,5,195
That one's pretty good. It has an average classification error of 0.032.
(EDIT 2) Here is a plot of the training and validation accuracy for the well-performing run:
This one, however, is much worse (average classification error of 0.389):
192,0,0,0,0,8,0,0
0,181,0,0,0,19,0,0
44,3,106,39,3,5,0,0
0,0,3,168,29,0,0,0
0,7,0,0,193,0,0,0
62,2,0,0,0,136,0,0
32,33,49,35,31,20,0,0
27,20,48,46,37,20,0,2
(EDIT 2) Here is a plot of the training and validation accuracy for the badly performing run (looks awful):
(EDIT 2) New thought: could this be because of bad initial conditions after all? Why else would some runs be good and some be so bad?
Here is the code (EDITED to show the last two changes in the bullet points):
#!/usr/bin/env python
import numpy as np
import pandas, math, sys, keras
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN
from keras.regularizers import l2
from keras.utils import np_utils
from keras.utils.np_utils import to_categorical
def load_data(train_file, test_file):
trainset = np.loadtxt(train_file, delimiter=",")
# split into input (X) and output (Y) variables
X = trainset[:, 0:(trainset.shape[1]-2)]
Y = (trainset[:,trainset.shape[1]-1]).astype(int)
scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)
y_binary = to_categorical(Y)
testset = np.loadtxt(test_file, delimiter=",")
X_test = testset[:,0:(testset.shape[1]-2)]
X_test_scaled = scaler.fit_transform(X_test)
Y_test = (testset[:,testset.shape[1]-1]).astype(int)
X_train = np.reshape(X, (X.shape[0], X.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
ytest_binary = to_categorical(Y_test)
return (X_train, y_binary, X_test, ytest_binary)
def get_confusion_matrix_one_hot(model_results, truth):
'''model_results and truth should be for one-hot format, i.e, have >= 2 columns,
where truth is 0/1, and max along each row of model_results is model result
'''
assert model_results.shape == truth.shape
num_outputs = truth.shape[1]
confusion_matrix = np.zeros((num_outputs, num_outputs), dtype=np.int32)
predictions = np.argmax(model_results,axis=1)
assert len(predictions)==truth.shape[0]
for actual_class in range(num_outputs):
idx_examples_this_class = truth[:,actual_class]==1
prediction_for_this_class = predictions[idx_examples_this_class]
for predicted_class in range(num_outputs):
count = np.sum(prediction_for_this_class==predicted_class)
confusion_matrix[actual_class, predicted_class] = count
assert np.sum(confusion_matrix)==len(truth)
assert np.sum(confusion_matrix)==np.sum(truth)
return confusion_matrix
def rnn_repeat(X, Y, Xtest, Ytest, params_to_use, save_file="rnn_twolayer_acc_loss", num_reps=10):
def rnn_model(hid_dim=10, ker_reg=0.01, rec_reg=0.01, optimizer="sgd"):
model = Sequential()
model.add(SimpleRNN(units=hid_dim, activation='relu', input_shape = (X.shape[1], X.shape[2]), kernel_regularizer=l2(ker_reg), recurrent_regularizer = l2(rec_reg), return_sequences = False))
model.add(Dense(Y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
print 'fitting a model'
return model
optim = params_to_use['optimizer']
ker_reg_best = params_to_use['ker_reg']
hid_val = params_to_use['hid_dim']
rec_reg_best = params_to_use['rec_reg']
#Need this for all classification errors
class_error = []
#X has 6400 examples, 800 of each class (8 classes). Take 20% of each class and make this the validation set.
to_take = np.random.choice(800, int(800*0.2), replace=False)
class_split = np.array_split(X, 8) #16 equal slices of 800 elements each
val_list = [x[to_take] for x in class_split]
big_list = [item for sublist in val_list for item in sublist]
val_X = np.asarray(big_list)
label_set = np.arange(0, 8) #0 to 15
val_Y = np.repeat(label_set, int(800*0.2))
val_Y = to_categorical(val_Y)
setdiffval = set(range(800)) - set(to_take)
setdiffval = list(setdiffval)
X_train_vals = [x[setdiffval] for x in class_split]
X_train = [item for sublist in X_train_vals for item in sublist]
X_train = np.asarray(X_train)
Y_train = np.repeat(label_set, int(800*0.8))
Y_train = to_categorical(Y_train)
for j in range(num_reps):
model = rnn_model(hid_dim=hid_val, ker_reg=ker_reg_best, rec_reg=rec_reg_best, optimizer=optim)
hist = model.fit(X_train, Y_train, validation_data=(val_X, val_Y), epochs=200)
h1 = hist.history
acc_ = np.asarray(h1['acc'])
loss_ = np.asarray(h1['loss'])
val_loss_ = np.asarray(h1['val_loss'])
val_acc_ = np.asarray(h1['val_acc'])
#Save the accuracy and loss
acc_and_loss = np.column_stack((acc_, loss_, val_acc_, val_loss_))
save_file_rnn = save_file + '_' + str(j) + '.txt'
with open(save_file_rnn, 'w') as f:
np.savetxt(save_file_rnn, acc_and_loss, delimiter=" ")
print 'saved file', save_file_rnn
#Run the final, trained model on the test set and return a confusion matrix
test_scores = model.evaluate(Xtest, Ytest) #evaluate returns the metric 'accuracy' on the test set
print 'eval_scores', test_scores[1]*100
predict = model.predict(Xtest) #I think predict returns the class probabilities
con_mat = get_confusion_matrix_one_hot(predict, Ytest)
print con_mat
#Save the confusion matrix
save_con_mat = 'confusionmatrix_rnn' + '_' + str(j) + '.txt'
np.savetxt(save_con_mat, con_mat, fmt='%i', delimiter=",")
#Get the classification error per class, and the average classification error
class_error_j = []
for i in range(0, con_mat.shape[1]): #for the number of classes
class_i_correct = float(con_mat[i][i])/float(sum(con_mat[i]))
class_error_j.append(1. - class_i_correct)
class_error_j.append(sum(class_error_j)/con_mat.shape[1]) #the average classification error
class_error.append(class_error_j) #each class's classification error, and the average classification error for this run
np.savetxt('class_error.txt', class_error, fmt = '%1.3f', delimiter=' ')
return 0
if __name__=='__main__':
X, Y, Xtest, Ytest = load_data('train.csv', 'test.csv')
best_params = {'rec_reg': 0.01, 'optimizer': 'adam', 'ker_reg': 0.1, 'hid_dim': 15}
rnn_repeat(X, Y, Xtest, Ytest, best_params)
rnn_model()
– Emre May 17 '17 at 03:23