首先来看一下原始论文的描述:
FCN 中metric的深入理解与对应代码
一开始我对n_ji和n_ij的理解挺绕的。后来理清楚了之后发现很简单:
inii\sum_{i}n_{ii} 即class i 被正确分成了class i。 True Positive
jnij=ti\sum_{j}n_{ij}=t_i 即class i 被正确分成了class j,然后对class j求和。这就是ground-truth里class=i的个数
jnji\sum_{j}n_{ji} 这就是preds里class=i的个数

(但是需要注意,本篇关于multi-class的所有观点都是基于one v.s. all的思路来考虑的,也就是有one-hot的意思在里面,问学长的时候他说,一般而言对于multi-class的问题,11和00都是正确,而01,10也都是错误)

用代码来表示(对于multi-class来说)的话,

#若gt和preds没有进行one-hot编码,那么是这样:
n_ij = t_i = np.sum(GT==i)
n_ji = np.sum(Preds == i )

#若gt和preds进行了one-hot编码,那么是这样:
curr_gt_mask = gt_masks[i,:,:]
curr_pred_mask = pred_masks[i,:,:]
t_i = np.sum(curr_gt_mask)
n_ji = np.sum(curr_pred_mask)

自然而言的可以想到confusion_matrix,记得在半个月前,我也思考过multi-class的混淆矩阵,但是当时理解的还很不到位。
基本的混淆矩阵如下图所示:
FCN 中metric的深入理解与对应代码
FCN 中metric的深入理解与对应代码

jnjinii=FalsePositive\sum_{j}n_{ji}-n_{ii}=False Positive

jnijnii=FalseNegative\sum_{j}n_{ij}-n_{ii}=FalseNegative

那么,在iou中,所谓的union,也就是ti+jnjiniit_i+\sum_{j}n_{ji}-n_{ii}==(FP-TP)+(FN-TP)+TP

import numpy as np
from sklearn.metrics import confusion_matrix

y_true = [1, -1,  0,  0,  1, -1,  1,  0, -1,  0,  1, -1,  1,  0,  0, -1,  0]
y_prediction = [-1, -1,  1,  0,  0,  0,  0, -1,  1, -1,  1,  1,  0,  0,  1,  1, -1]
cnf_matrix = confusion_matrix(y_true, y_prediction)
print(cnf_matrix)
#[[1 1 3]
# [3 2 2]
# [1 3 1]]

FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)  # 即一列一列的来看
FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix) # 即一行一行的来看
TP = np.diag(cnf_matrix)
TN = cnf_matrix.sum() - (FP + FN + TP) #见下面解释

对于TN而言,从one vs all的角度比较好理解(以上述混淆矩阵的class0作为举例)
FCN 中metric的深入理解与对应代码
代码:github:Image Segmentation Evaluation

import numpy as np

def pixel_accuracy(eval_segm, gt_segm):
    '''
    sum_i(n_ii) / sum_i(t_i)
    '''

    check_size(eval_segm, gt_segm)

    cl, n_cl = extract_classes(gt_segm)
    eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)

    sum_n_ii = 0
    sum_t_i  = 0

    for i, c in enumerate(cl):
        curr_eval_mask = eval_mask[i, :, :]
        curr_gt_mask = gt_mask[i, :, :]

        sum_n_ii += np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
        sum_t_i  += np.sum(curr_gt_mask)
 
    if (sum_t_i == 0):
        pixel_accuracy_ = 0
    else:
        pixel_accuracy_ = sum_n_ii / sum_t_i

    return pixel_accuracy_

def mean_accuracy(eval_segm, gt_segm):
    '''
    (1/n_cl) sum_i(n_ii/t_i)
    '''

    check_size(eval_segm, gt_segm)

    cl, n_cl = extract_classes(gt_segm)
    eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)

    accuracy = list([0]) * n_cl

    for i, c in enumerate(cl):
        curr_eval_mask = eval_mask[i, :, :]
        curr_gt_mask = gt_mask[i, :, :]

        n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
        t_i  = np.sum(curr_gt_mask)
 
        if (t_i != 0):
            accuracy[i] = n_ii / t_i

    mean_accuracy_ = np.mean(accuracy)
    return mean_accuracy_

def mean_IU(eval_segm, gt_segm):
    '''
    (1/n_cl) * sum_i(n_ii / (t_i + sum_j(n_ji) - n_ii))
    '''

    check_size(eval_segm, gt_segm)

    cl, n_cl   = union_classes(eval_segm, gt_segm)
    _, n_cl_gt = extract_classes(gt_segm)
    eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)

    IU = list([0]) * n_cl

    for i, c in enumerate(cl):
        curr_eval_mask = eval_mask[i, :, :]
        curr_gt_mask = gt_mask[i, :, :]
 
        if (np.sum(curr_eval_mask) == 0) or (np.sum(curr_gt_mask) == 0):
            continue

        n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
        t_i  = np.sum(curr_gt_mask)
        n_ij = np.sum(curr_eval_mask)

        IU[i] = n_ii / (t_i + n_ij - n_ii)
 
    mean_IU_ = np.sum(IU) / n_cl_gt
    return mean_IU_

def frequency_weighted_IU(eval_segm, gt_segm):
    '''
    sum_k(t_k)^(-1) * sum_i((t_i*n_ii)/(t_i + sum_j(n_ji) - n_ii))
    '''

    check_size(eval_segm, gt_segm)

    cl, n_cl = union_classes(eval_segm, gt_segm)
    eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)

    frequency_weighted_IU_ = list([0]) * n_cl

    for i, c in enumerate(cl):
        curr_eval_mask = eval_mask[i, :, :]
        curr_gt_mask = gt_mask[i, :, :]
 
        if (np.sum(curr_eval_mask) == 0) or (np.sum(curr_gt_mask) == 0):
            continue

        n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
        t_i  = np.sum(curr_gt_mask)
        n_ij = np.sum(curr_eval_mask)

        frequency_weighted_IU_[i] = (t_i * n_ii) / (t_i + n_ij - n_ii)
 
    sum_k_t_k = get_pixel_area(eval_segm)
    
    frequency_weighted_IU_ = np.sum(frequency_weighted_IU_) / sum_k_t_k
    return frequency_weighted_IU_

'''
Auxiliary functions used during evaluation.
'''
def get_pixel_area(segm):
    return segm.shape[0] * segm.shape[1]

def extract_both_masks(eval_segm, gt_segm, cl, n_cl):
    eval_mask = extract_masks(eval_segm, cl, n_cl)
    gt_mask   = extract_masks(gt_segm, cl, n_cl)

    return eval_mask, gt_mask

def extract_classes(segm):
    cl = np.unique(segm)
    n_cl = len(cl)

    return cl, n_cl

def union_classes(eval_segm, gt_segm):
    eval_cl, _ = extract_classes(eval_segm)
    gt_cl, _   = extract_classes(gt_segm)

    cl = np.union1d(eval_cl, gt_cl)
    n_cl = len(cl)

    return cl, n_cl

def extract_masks(segm, cl, n_cl):
    h, w  = segm_size(segm)
    masks = np.zeros((n_cl, h, w))

    for i, c in enumerate(cl):
        masks[i, :, :] = segm == c

    return masks

def segm_size(segm):
    try:
        height = segm.shape[0]
        width  = segm.shape[1]
    except IndexError:
        raise

    return height, width

def check_size(eval_segm, gt_segm):
    h_e, w_e = segm_size(eval_segm)
    h_g, w_g = segm_size(gt_segm)

    if (h_e != h_g) or (w_e != w_g):
        raise EvalSegErr("DiffDim: Different dimensions of matrices!")

'''
Exceptions
'''
class EvalSegErr(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)

相关文章:

  • 2021-11-22
  • 2021-08-08
  • 2022-01-11
  • 2022-12-23
  • 2021-06-29
  • 2021-09-07
  • 2022-12-23
猜你喜欢
  • 2021-06-20
  • 2021-07-26
  • 2021-10-09
  • 2021-04-07
  • 2021-08-13
  • 2021-04-17
相关资源
相似解决方案