Implementing SVM from scratch?

Question

I am trying to implement the rbf kernel for SVM from scratch as practice for my coming interviews. I attempted to use cvxopt to solve the optimization problem. However, when I compute the accuracy and compare it to the actual SVM library on sklearn, there is an extremely large discrepancy. I have attempted to isolate the problem but I cannot seem to fix it. Any help would be greatly appreciated. Posted below is the code. If anyone could please let me know what I am doing wrong or help suggest an alternative approach I would greatly appreciate it.

import numpy as np
import cvxopt
def rbf_kernel(gamma, kwargs):
    def f(x1, x2):
        distance = np.linalg.norm(x1 - x2)  2
        return np.exp(-gamma * distance)
    return f
class SupportVectorMachine(object):
    def init(self, C=1, kernel=rbf_kernel, power=4, gamma=None, coef=4):
        self.C = C
        self.kernel = kernel
        self.power = power
        self.gamma = gamma
        self.coef = coef
        self.lagr_multipliers = None
        self.support_vectors = None
        self.support_vector_labels = None
        self.intercept = None
def fit(self, X, y):

    n_samples, n_features = np.shape(X)

    # Set gamma to 1/n_features by default
    if not self.gamma:
        self.gamma = 1 / n_features

    # Initialize kernel method with parameters
    self.kernel = self.kernel(
        power=self.power,
        gamma=self.gamma,
        coef=self.coef)

    # Calculate kernel matrix
    kernel_matrix = np.zeros((n_samples, n_samples))
    for i in range(n_samples):
        for j in range(n_samples):
            kernel_matrix[i, j] = self.kernel(X[i], X[j])

    # Define the quadratic optimization problem
    P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d')
    q = cvxopt.matrix(np.ones(n_samples) * -1)
    A = cvxopt.matrix(y, (1, n_samples), tc='d')
    b = cvxopt.matrix(0, tc='d')

    if not self.C: #if its empty
        G = cvxopt.matrix(np.identity(n_samples) * -1)
        h = cvxopt.matrix(np.zeros(n_samples))
    else:
        G_max = np.identity(n_samples) * -1
        G_min = np.identity(n_samples)
        G = cvxopt.matrix(np.vstack((G_max, G_min)))
        h_max = cvxopt.matrix(np.zeros(n_samples))
        h_min = cvxopt.matrix(np.ones(n_samples) * self.C)
        h = cvxopt.matrix(np.vstack((h_max, h_min)))

    # Solve the quadratic optimization problem using cvxopt
    minimization = cvxopt.solvers.qp(P, q, G, h, A, b)

    # Lagrange multipliers
    lagr_mult = np.ravel(minimization['x'])

    # Extract support vectors
    # Get indexes of non-zero lagr. multipiers
    idx = lagr_mult &gt; 1e-11
    # Get the corresponding lagr. multipliers
    self.lagr_multipliers = lagr_mult[idx]
    # Get the samples that will act as support vectors
    self.support_vectors = X[idx]
    # Get the corresponding labels
    self.support_vector_labels = y[idx]

# Calculate intercept with first support vector
    self.intercept = self.support_vector_labels[0]
    for i in range(len(self.lagr_multipliers)):
      self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[
         i] * self.kernel(self.support_vectors[i], self.support_vectors[0])


def predict(self, X):
    y_pred = []
# Iterate through list of samples and make predictions
    for sample in X:
        prediction = 0
        # Determine the label of the sample by the support vectors
        for i in range(len(self.lagr_multipliers)):
            prediction += self.lagr_multipliers[i] * self.support_vector_labels[
            i] * self.kernel(self.support_vectors[i], sample)
        prediction += self.intercept
        y_pred.append(np.sign(prediction))
    return np.array(y_pred)




def main():
    print ("-- SVM Classifier --")
data = load_iris()

# previous error 

#X = normalize(data.data)
   #y = data.target
# correct version 
X = normalize(data.data[data.target != 0])
y = data.target[data.target != 0]
y[y == 1] = -1
y[y == 2] = 1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
clf = SupportVectorMachine(kernel=rbf_kernel, gamma = 1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print (&quot;Accuracy (scratch):&quot;, accuracy)

clf_sklearn = SVC(gamma = 'auto')
clf_sklearn.fit(X_train, y_train)
y_pred2 = clf_sklearn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred2)
print (&quot;Accuracy :&quot;, accuracy)


if name == "main":
    main()

RESULTS: Accuracy (from scratch): 0.31666666666666665 Accuracy (using SVM Library) : 1.0

Note, I did not add the libraries to save space

Could you please post the hyperparameters after you fitted the model, both from sklearn and your own model? — Pedro Henrique Monforte, Mar 25 '19 at 18:51

score 1 · Answer 1 · answered Mar 25 '19 at 18:42

This is actually correct code. Nothing is wrong with it per se.

However, NOTE: that this is meant for OVO (one versus one) SVM. Basically if you are comparing two classes. THIS is not meant for more than two classes, hence why you would get a lower accuracy.

Implementing SVM from scratch?

1 Answers1