Evaluating metrics for Multi-label Classification and Implementations

X,y = make_multilabel_classification(sparse = False, n_samples = 100, n_labels = 4, allow_unlabeled = False)
  1. Problem Transformation
  2. Adapted Algorithm
  3. Ensemble Approaches
  1. Problem Transformation: This can be carried out in 3 ways
from sklearn.datasets import make_multilabel_classification
from skmultilearn.problem_transform import BinaryRelevance, ClassifierChain, LabelPowerset
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from scipy import sparse
X,y = make_multilabel_classification(sparse = False, n_samples = 100, n_labels = 4, allow_unlabeled = False)X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2)tree_model = DecisionTreeClassifier()meta_model = BinaryRelevance(tree_model)meta_model.fit(X_train,y_train)predictions = meta_model.predict(X_test)
print(metrics.accuracy_score(y_test, predictions))
from sklearn.datasets import make_multilabel_classification
from skmultilearn.problem_transform import BinaryRelevance, ClassifierChain, LabelPowerset
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from scipy import sparse
X,y = make_multilabel_classification(sparse = False, n_samples = 100, n_labels = 4, allow_unlabeled = False)X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2)tree_model = DecisionTreeClassifier()meta_model = ClassifierChain(tree_model)meta_model.fit(X_train,y_train)predictions = meta_model.predict(X_test)
print(metrics.accuracy_score(y_test, predictions))
  1. http://scikit.ml/api/skmultilearn.html
  2. https://github.com/scikit-multilearn/scikit-multilearn/blob/master/skmultilearn/adapt/mlknn.py
  1. Precision @ K.
  2. Average Precision @ K.
  3. Mean Average Precision @ K.
  4. Sampled F1-Score.
ydef patk(actual, pred, k):
if k == 0:
return 0
k_pred = pred[:k]
actual_set = set(actual) pred_set = set(k_pred) common_values = actual_set.intersection(pred_set) return len(common_values)/len(pred[:k])y_true = [1 ,2, 0]
y_pred = [1, 1, 0]
if __name__ == "__main__":
print(patk(y_true, y_pred,3))
import numpy as npdef patk(actual, pred, k):
if k == 0:
return 0
k_pred = pred[:k]actual_set = set(actual)pred_set = set(k_pred)common_values = actual_set.intersection(pred_set)return len(common_values)/len(pred[:k])

def apatk(acutal, pred, k):
precision_ = []
for i in range(1, k+1):
precision_.append(patk(acutal, pred, i))if len(precision_) == 0:
return 0
return np.mean(precision_)y_true = [[1,2,0,1], [0,4], [3], [1,2]]
y_pred = [[1,1,0,1], [1,4], [2], [1,3]]
if __name__ == "__main__":
for i in range(len(y_true)):
for j in range(1, 4):
print("for K = "+str(j)+"average precision is "+str(apatk(y_true[i], y_pred[i], k=j)))
import numpy as npdef patk(actual, pred, k):
if k == 0:
return 0
k_pred = pred[:k]actual_set = set(actual)pred_set = set(k_pred)common_values = actual_set.intersection(pred_set)return len(common_values)/len(pred[:k])

def apatk(acutal, pred, k):
precision_ = []
for i in range(1, k+1):
precision_.append(patk(acutal, pred, i))if len(precision_) == 0:
return 0
return np.mean(precision_)y_true = [[1,2,0,1], [0,4], [3], [1,2]]
y_pred = [[1,1,0,1], [1,4], [2], [1,3]]
def mapk(acutal, pred, k):#creating a list for storing the Average Precision Values
average_precision = []
#interating through the whole data and calculating the apk for each
for i in range(len(acutal)):
average_precision.append(apatk(acutal[i], pred[i], k))
#returning the mean of all the data
return np.mean(average_precision)
if __name__ == "__main__":
print(mapk(y_true, y_pred,3))
from sklearn.metrics import f1_scorefrom sklearn.preprocessing import MultiLabelBinarizerdef f1_sampled(actual, pred):   mlb = MultiLabelBinarizer()   actual = mlb.fit_transform(actual)   pred = mlb.fit_transform(pred)   f1 = f1_score(actual, pred, average = "samples")   return f1y_true = [[1,2,0,1], [0,4], [3], [1,2]]y_pred = [[1,1,0,1], [1,4], [2], [1,3]]if __name__ == "__main__":   print(f1_sampled(y_true, y_pred))
  1. Instead of Precision, Average Precision, and Mean Average Precision @ K, We can apply to Mean Average Precision @ K, to know the accuracy of the whole data. Apply remaining metrics according to the requirement.
  2. With this Article, Classification metrics are completed. My next topic will depend on what readers want.

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Vishnu vardhan Varapalli

Vishnu vardhan Varapalli

31 Followers

Software Engineer, working on real-time problems to minimize the errors.