Please cite us if you use the software
Checking that the notebook is running on Google Colab or not.
import sys
try:
import google.colab
!{sys.executable} -m pip -q -q install pycm
except:
pass
from pycm import ConfusionMatrix
def activation1(i):
if i>0.5:
return 1
else:
return 0
y_actual = [1, 1, 0, 1, 1, 0]
y_pred = [0.65, 0.7, 0.4, 0.54, 0.82, 0.9]
cm = ConfusionMatrix(y_actual, y_pred, threshold=activation1)
cm.classes
[0, 1]
print(cm)
Predict 0 1 Actual 0 1 1 1 0 4 Overall Statistics : 95% CI (0.53513,1.13154) ACC Macro 0.83333 ARI 0.34783 AUNP 0.75 AUNU 0.75 Bangdiwala B 0.77273 Bennett S 0.66667 CBA 0.65 CSI 0.65 Chi-Squared 2.4 Chi-Squared DF 1 Conditional Entropy 0.33333 Cramer V 0.63246 Cross Entropy 1.03701 F1 Macro 0.77778 F1 Micro 0.83333 FNR Macro 0.25 FNR Micro 0.16667 FPR Macro 0.25 FPR Micro 0.16667 Gwet AC1 0.73333 Hamming Loss 0.16667 Joint Entropy 1.25163 KL Divergence 0.11871 Kappa 0.57143 Kappa 95% CI (-0.19538,1.33824) Kappa No Prevalence 0.66667 Kappa Standard Error 0.39123 Kappa Unbiased 0.55556 Krippendorff Alpha 0.59259 Lambda A 0.5 Lambda B 0.0 Mutual Information 0.31669 NIR 0.66667 NPV Macro 0.9 NPV Micro 0.83333 Overall ACC 0.83333 Overall CEN 0.39624 Overall J (1.3,0.65) Overall MCC 0.63246 Overall MCEN 0.27683 Overall RACC 0.61111 Overall RACCU 0.625 P-Value 0.35117 PPV Macro 0.9 PPV Micro 0.83333 Pearson C 0.53452 Phi-Squared 0.4 RCI 0.34487 RR 3.0 Reference Entropy 0.9183 Response Entropy 0.65002 SOA1(Landis & Koch) Moderate SOA2(Fleiss) Intermediate to Good SOA3(Altman) Moderate SOA4(Cicchetti) Fair SOA5(Cramer) Strong SOA6(Matthews) Moderate SOA7(Lambda A) Moderate SOA8(Lambda B) None SOA9(Krippendorff Alpha) Low SOA10(Pearson C) Strong Scott PI 0.55556 Standard Error 0.15215 TNR Macro 0.75 TNR Micro 0.83333 TPR Macro 0.75 TPR Micro 0.83333 Zero-one Loss 1 Class Statistics : Classes 0 1 ACC(Accuracy) 0.83333 0.83333 AGF(Adjusted F-score) 0.68041 0.89087 AGM(Adjusted geometric mean) 0.82426 0.65533 AM(Difference between automatic and manual classification) -1 1 AUC(Area under the ROC curve) 0.75 0.75 AUCI(AUC value interpretation) Good Good AUPR(Area under the PR curve) 0.75 0.9 BB(Braun-Blanquet similarity) 0.5 0.8 BCD(Bray-Curtis dissimilarity) 0.08333 0.08333 BM(Informedness or bookmaker informedness) 0.5 0.5 CEN(Confusion entropy) 0.52832 0.35221 DOR(Diagnostic odds ratio) None None DP(Discriminant power) None None DPI(Discriminant power interpretation) None None ERR(Error rate) 0.16667 0.16667 F0.5(F0.5 score) 0.83333 0.83333 F1(F1 score - harmonic mean of precision and sensitivity) 0.66667 0.88889 F2(F2 score) 0.55556 0.95238 FDR(False discovery rate) 0.0 0.2 FN(False negative/miss/type 2 error) 1 0 FNR(Miss rate or false negative rate) 0.5 0.0 FOR(False omission rate) 0.2 0.0 FP(False positive/type 1 error/false alarm) 0 1 FPR(Fall-out or false positive rate) 0.0 0.5 G(G-measure geometric mean of precision and sensitivity) 0.70711 0.89443 GI(Gini index) 0.5 0.5 GM(G-mean geometric mean of specificity and sensitivity) 0.70711 0.70711 HD(Hamming distance) 1 1 IBA(Index of balanced accuracy) 0.25 0.75 ICSI(Individual classification success index) 0.5 0.8 IS(Information score) 1.58496 0.26303 J(Jaccard index) 0.5 0.8 LS(Lift score) 3.0 1.2 MCC(Matthews correlation coefficient) 0.63246 0.63246 MCCI(Matthews correlation coefficient interpretation) Moderate Moderate MCEN(Modified confusion entropy) 0.5 0.46439 MK(Markedness) 0.8 0.8 N(Condition negative) 4 2 NLR(Negative likelihood ratio) 0.5 0.0 NLRI(Negative likelihood ratio interpretation) Negligible Good NPV(Negative predictive value) 0.8 1.0 OC(Overlap coefficient) 1.0 1.0 OOC(Otsuka-Ochiai coefficient) 0.70711 0.89443 OP(Optimized precision) 0.5 0.5 P(Condition positive or support) 2 4 PLR(Positive likelihood ratio) None 2.0 PLRI(Positive likelihood ratio interpretation) None Poor POP(Population) 6 6 PPV(Precision or positive predictive value) 1.0 0.8 PRE(Prevalence) 0.33333 0.66667 Q(Yule Q - coefficient of colligation) None None QI(Yule Q interpretation) None None RACC(Random accuracy) 0.05556 0.55556 RACCU(Random accuracy unbiased) 0.0625 0.5625 TN(True negative/correct rejection) 4 1 TNR(Specificity or true negative rate) 1.0 0.5 TON(Test outcome negative) 5 1 TOP(Test outcome positive) 1 5 TP(True positive/hit) 1 4 TPR(Sensitivity, recall, hit rate, or true positive rate) 0.5 1.0 Y(Youden index) 0.5 0.5 dInd(Distance index) 0.5 0.5 sInd(Similarity index) 0.64645 0.64645
def activation2(i):
ref = ["cat", "dog", "ship"]
return ref[i.index(max(i))]
y_actual = ["ship", "cat", "dog", "cat", "dog", "ship"]
y_pred = [
[0.08, 0.39, 0.53],
[0.77, 0.22, 0.01],
[0.5, 0.45, 0.05],
[0.7, 0.17, 0.13],
[0.28, 0.71, 0.01],
[0.12, 0.03, 0.85]]
cm2 = ConfusionMatrix(y_actual, y_pred, threshold=activation2)
cm2.classes
['cat', 'dog', 'ship']
print(cm2)
Predict cat dog ship Actual cat 2 0 0 dog 1 1 0 ship 0 0 2 Overall Statistics : 95% CI (0.53513,1.13154) ACC Macro 0.88889 ARI 0.44444 AUNP 0.875 AUNU 0.875 Bangdiwala B 0.75 Bennett S 0.75 CBA 0.72222 CSI 0.72222 Chi-Squared 8.0 Chi-Squared DF 4 Conditional Entropy 0.33333 Cramer V 0.8165 Cross Entropy 1.72331 F1 Macro 0.82222 F1 Micro 0.83333 FNR Macro 0.16667 FNR Micro 0.16667 FPR Macro 0.08333 FPR Micro 0.08333 Gwet AC1 0.75258 Hamming Loss 0.16667 Joint Entropy 1.9183 KL Divergence 0.13835 Kappa 0.75 Kappa 95% CI (0.30269,1.19731) Kappa No Prevalence 0.66667 Kappa Standard Error 0.22822 Kappa Unbiased 0.74468 Krippendorff Alpha 0.76596 Lambda A 0.75 Lambda B 0.66667 Mutual Information 1.12581 NIR 0.33333 NPV Macro 0.93333 NPV Micro 0.91667 Overall ACC 0.83333 Overall CEN 0.16279 Overall J (2.16667,0.72222) Overall MCC 0.78335 Overall MCEN 0.18464 Overall RACC 0.33333 Overall RACCU 0.34722 P-Value 0.01783 PPV Macro 0.88889 PPV Micro 0.83333 Pearson C 0.75593 Phi-Squared 1.33333 RCI 0.71031 RR 2.0 Reference Entropy 1.58496 Response Entropy 1.45915 SOA1(Landis & Koch) Substantial SOA2(Fleiss) Intermediate to Good SOA3(Altman) Good SOA4(Cicchetti) Excellent SOA5(Cramer) Very Strong SOA6(Matthews) Strong SOA7(Lambda A) Strong SOA8(Lambda B) Strong SOA9(Krippendorff Alpha) Tentative SOA10(Pearson C) Strong Scott PI 0.74468 Standard Error 0.15215 TNR Macro 0.91667 TNR Micro 0.91667 TPR Macro 0.83333 TPR Micro 0.83333 Zero-one Loss 1 Class Statistics : Classes cat dog ship ACC(Accuracy) 0.83333 0.83333 1.0 AGF(Adjusted F-score) 0.92319 0.68041 1.0 AGM(Adjusted geometric mean) 0.81962 0.82426 1.0 AM(Difference between automatic and manual classification) 1 -1 0 AUC(Area under the ROC curve) 0.875 0.75 1.0 AUCI(AUC value interpretation) Very Good Good Excellent AUPR(Area under the PR curve) 0.83333 0.75 1.0 BB(Braun-Blanquet similarity) 0.66667 0.5 1.0 BCD(Bray-Curtis dissimilarity) 0.08333 0.08333 0.0 BM(Informedness or bookmaker informedness) 0.75 0.5 1.0 CEN(Confusion entropy) 0.23219 0.26416 0 DOR(Diagnostic odds ratio) None None None DP(Discriminant power) None None None DPI(Discriminant power interpretation) None None None ERR(Error rate) 0.16667 0.16667 0.0 F0.5(F0.5 score) 0.71429 0.83333 1.0 F1(F1 score - harmonic mean of precision and sensitivity) 0.8 0.66667 1.0 F2(F2 score) 0.90909 0.55556 1.0 FDR(False discovery rate) 0.33333 0.0 0.0 FN(False negative/miss/type 2 error) 0 1 0 FNR(Miss rate or false negative rate) 0.0 0.5 0.0 FOR(False omission rate) 0.0 0.2 0.0 FP(False positive/type 1 error/false alarm) 1 0 0 FPR(Fall-out or false positive rate) 0.25 0.0 0.0 G(G-measure geometric mean of precision and sensitivity) 0.8165 0.70711 1.0 GI(Gini index) 0.75 0.5 1.0 GM(G-mean geometric mean of specificity and sensitivity) 0.86603 0.70711 1.0 HD(Hamming distance) 1 1 0 IBA(Index of balanced accuracy) 0.9375 0.25 1.0 ICSI(Individual classification success index) 0.66667 0.5 1.0 IS(Information score) 1.0 1.58496 1.58496 J(Jaccard index) 0.66667 0.5 1.0 LS(Lift score) 2.0 3.0 3.0 MCC(Matthews correlation coefficient) 0.70711 0.63246 1.0 MCCI(Matthews correlation coefficient interpretation) Strong Moderate Very Strong MCEN(Modified confusion entropy) 0.26416 0.25 0 MK(Markedness) 0.66667 0.8 1.0 N(Condition negative) 4 4 4 NLR(Negative likelihood ratio) 0.0 0.5 0.0 NLRI(Negative likelihood ratio interpretation) Good Negligible Good NPV(Negative predictive value) 1.0 0.8 1.0 OC(Overlap coefficient) 1.0 1.0 1.0 OOC(Otsuka-Ochiai coefficient) 0.8165 0.70711 1.0 OP(Optimized precision) 0.69048 0.5 1.0 P(Condition positive or support) 2 2 2 PLR(Positive likelihood ratio) 4.0 None None PLRI(Positive likelihood ratio interpretation) Poor None None POP(Population) 6 6 6 PPV(Precision or positive predictive value) 0.66667 1.0 1.0 PRE(Prevalence) 0.33333 0.33333 0.33333 Q(Yule Q - coefficient of colligation) None None None QI(Yule Q interpretation) None None None RACC(Random accuracy) 0.16667 0.05556 0.11111 RACCU(Random accuracy unbiased) 0.17361 0.0625 0.11111 TN(True negative/correct rejection) 3 4 4 TNR(Specificity or true negative rate) 0.75 1.0 1.0 TON(Test outcome negative) 3 5 4 TOP(Test outcome positive) 3 1 2 TP(True positive/hit) 2 1 2 TPR(Sensitivity, recall, hit rate, or true positive rate) 1.0 0.5 1.0 Y(Youden index) 0.75 0.5 1.0 dInd(Distance index) 0.25 0.5 0.0 sInd(Similarity index) 0.82322 0.64645 1.0