Source code for scripts.analyze_agreement

#!/usr/bin/env python
"""``analyze_agreement.py`` is a script that analyzes the agreement between two
annotations of the same file. The script measures:

* Object counts: are they the same?
* Object assignment: given the least-squares mapping of objects
  onto each other, to what extent do they differ?

For an overview of command-line options, call::

  analyze_agreement.py -h

Alignment algorithm
-------------------

The script uses a greedy alignment procedure.

First, it computes for each ``(truth, prediction)`` symbol pair
their recall, precision, and f-score over pixels that fall within
the mask (bounding box overlap may be misleading, mainly for
parallel beams).

Each predicted symbol is then aligned to the ground truth symbol
with the highest f-score. If the symbol classes of a ``(truth, prediction)``
pair do not match, their score gets set to 0. (This can be turned
off using the ``--no_strict_clsnames`` option.)

Next, the alignment is cleaned up: if multiple predictions are
aligned to a single ground truth, the one with the highest f-score
is chosen and the other predicted symbols are considered
unaligned.

Computing the output f-score
----------------------------

Finally, we sum all the f-scores of ``(truth, prediction)``
symbol pairs in the alignment.

Ground truth symbols that are not aligned to any predicted object
also contribute a zero to the overall f-score.

"""
from __future__ import print_function, unicode_literals
from __future__ import division
import argparse
import collections
import logging
import pprint
import time

import numpy

from muscima.io import parse_cropobject_list

__version__ = "0.0.1"
__author__ = "Jan Hajic jr."


##############################################################################


[docs]def bbox_intersection(origin, intersect):
    """Returns the coordinates of the origin bounding box that
    are intersected by the intersect bounding box.

    >>> bounding_box = 10, 100, 30, 110
    >>> other_bbox = 20, 100, 40, 105
    >>> bbox_intersection(bounding_box, other_bbox)
    (10, 0, 20, 5)
    >>> bbox_intersection(other_bbox, bounding_box)
    (0, 0, 10, 5)
    >>> containing_bbox = 4, 55, 44, 115
    >>> bbox_intersection(bounding_box, containing_bbox)
    (0, 0, 20, 10)
    >>> contained_bbox = 12, 102, 22, 108
    >>> bbox_intersection(bounding_box, contained_bbox)
    (2, 2, 12, 8)
    >>> non_overlapping_bbox = 0, 0, 3, 3
    >>> bbox_intersection(bounding_box, non_overlapping_bbox) is None
    True

    """
    o_t, o_l, o_b, o_r = origin
    t, l, b, r = intersect

    out_top = max(t, o_t)
    out_left = max(l, o_l)
    out_bottom = min(b, o_b)
    out_right = min(r, o_r)

    if (out_top < out_bottom) and (out_left < out_right):
        return out_top - o_t, \
               out_left - o_l, \
               out_bottom - o_t, \
               out_right - o_l
    else:
        return None


##############################################################################


[docs]def pixel_metrics(truth, prediction):
    """Computes the recall, precision and f-score for the prediction
    CropObject given the truth CropObject."""
    recall, precision, fscore = 0, 0, 0

    intersection_truth = bbox_intersection(truth.bounding_box,
                                           prediction.bounding_box)
    if intersection_truth is None:
        logging.debug('No intersection for CropObjects: t={0},'
                      ' p={1}'.format(truth.bounding_box,
                                      prediction.bounding_box))
        return recall, precision, fscore

    intersection_pred = bbox_intersection(prediction.bounding_box,
                                          truth.bounding_box)

    logging.debug('Found intersection for CropObjects: t={0},'
                  ' p={1}'.format(truth.bounding_box,
                                  prediction.bounding_box))

    tt, tl, tb, tr = intersection_truth
    pt, pl, pb, pr = intersection_pred
    crop_truth = truth.mask[tt:tb, tl:tr]
    crop_pred = prediction.mask[pt:pb, pl:pr]

    # Assumes the mask values are 1...

    n_truth = float(truth.mask.sum())
    n_pred = float(prediction.mask.sum())
    n_common = float((crop_truth * crop_pred).sum())

    # There are no zero-pixel objects, but the overlap may be nonzero
    if n_truth == 0:
        recall = 0.0
        precision = 0.0
    elif n_pred == 0:
        recall = 0.0
        precision = 0.0
    else:
        recall = n_common / n_truth
        precision = n_common / n_pred

    if (recall == 0) or (precision == 0):
        fscore = 0
    else:
        fscore = 2 * recall * precision / (recall + precision)

    return recall, precision, fscore


[docs]def cropobjects_rpf(truth, prediction):
    """Computes CropObject pixel-level metrics.

    :param truth: A list of the ground truth CropObjects.

    :param prediction: A list of the predicted CropObjects.

    :returns: Three matrices with shape ``(len(truth), len(prediction)``:
        recall, precision, and f-score for each truth/prediction CropObject
        pair. Truth cropobjects are rows, prediction columns.
    """
    recall = numpy.zeros((len(truth), len(prediction)))
    precision = numpy.zeros((len(truth), len(prediction)))
    fscore = numpy.zeros((len(truth), len(prediction)))

    for i, t in enumerate(truth):
        for j, p in enumerate(prediction):
            r, p, f = pixel_metrics(t, p)
            recall[i, j] = r
            precision[i, j] = p
            fscore[i, j] = f

    return recall, precision, fscore


[docs]def align_cropobjects(truth, prediction, fscore=None):
    """Aligns prediction CropObjects to truth.

    :param truth: A list of the ground truth CropObjects.

    :param prediction: A list of the predicted CropObjects.

    :returns: A list of (t, p) pairs of CropObject indices into
        the truth and prediction lists. There will be one
        pair for each predicted symbol.
    """
    if fscore is None:
        _, _, fscore = cropobjects_rpf(truth, prediction)

    # For each prediction (column), pick the highest-scoring
    # True symbol.
    closest_truths = list(fscore.argmax(axis=0))

    # Checking for duplicate "best ground truth" alignments.
    # This does *not* check for duplicates in the sense
    # "multiple predictions aligned to the same truth"
    # or "multiple truths aligned to the same prediction",
    # it only acts as a tie-breaker in case one prediction overlaps
    # to the same degree multiple truth objects (e.g. a single sharp
    # in a key signature).
    closest_truth_distance = [fscore[ct, j]
                              for j, ct in enumerate(closest_truths)]
    equidistant_closest_truths = [[i for i, x in enumerate(fscore[:, j])
                                   if x == ct]
                                  for j, ct in enumerate(closest_truth_distance)]

    clsname_aware_closest_truths = []
    for j, ects in enumerate(equidistant_closest_truths):
        best_truth_i = int(ects[0])

        # If there is more than one tied best choice,
        # try to choose the truth cropobject that has the same
        # class as the predicted cropobject.
        if len(ects) > 1:
            ects_c = {truth[int(i)].clsname: i for i in ects}
            j_clsname = prediction[j].clsname
            if j_clsname in ects_c:
                best_truth_i = int(ects_c[j_clsname])

        clsname_aware_closest_truths.append(best_truth_i)

    alignment = [(t, p) for p, t in enumerate(clsname_aware_closest_truths)]
    return alignment


[docs]def rpf_given_alignment(alignment, r, p,
                        n_not_aligned=0,
                        strict_clsnames=True,
                        truths=None, predictions=None):
    if strict_clsnames:
        if not truths:
            raise ValueError('If strict_clsnames is requested, must supply truths'
                             ' CropObjects!')
        if not predictions:
            raise ValueError('If strict_clsnames is requested, must supply predictions'
                             ' CropObjects!')

    total_r, total_p = 0, 0

    for i, j in alignment:

        # Check for strict clsnames only at this stage.
        # The purpose is: if two people mark the same object
        # differently, we do want to know "it should be aligned
        # to each other, but the classes don't fit" -- we don't
        # want to maybe align it to an overlapping object of
        # the corresponding wrong class.
        if strict_clsnames:
            t_c = truths[i]
            p_c = predictions[j]
            if t_c.clsname != p_c.clsname:
                continue

        total_r += r[i, j]
        total_p += p[i, j]

    # This is not correct...? What about the zeros?
    #  - Prediction with no GT is in the alignment, so it counts towards
    #    the len(alignment) denominator.
    #  - GT with no prediction aligned to it contributes zero, but is
    #    not counted towards the denominator.
    total_r /= len(alignment) + n_not_aligned
    total_p /= len(alignment) + n_not_aligned

    if (total_r == 0) or (total_p == 0):
        total_f = 0.0
    else:
        total_f = 2 * total_r * total_p / (total_r + total_p)
    return total_r, total_p, total_f


##############################################################################


[docs]def build_argument_parser():
    parser = argparse.ArgumentParser(description=__doc__, add_help=True,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('-t', '--true', action='store', required=True,
                        help='The CropObjectList file you want to consider'
                             ' ground truth.')
    parser.add_argument('-p', '--prediction', action='store', required=True,
                        help='The CropObjectList file you want to consider'
                             ' the prediction.')

    parser.add_argument('-e', '--export', action='store',
                        help='If set, will export the problematic CropObjects'
                             ' to this file.')

    parser.add_argument('--analyze_alignment', action='store_true',
                        help='If set, will check whether the alignment is 1:1,'
                             ' and print out the irregularities.')
    parser.add_argument('--analyze_clsnames', action='store_true',
                        help='If set, will check whether the CropObjects aligned'
                             ' to each other have the same class labels'
                             ' and print out the irregularities.')
    parser.add_argument('--no_strict_clsnames', action='store_true',
                        help='If set, will not require aligned objects\' clsnames'
                             ' to match before computing pixel-wise overlap'
                             ' metrics.')
    parser.add_argument('--log_alignment', action='store_true',
                        help='Print how the true and predicted objects are'
                             ' paired.')

    parser.add_argument('--print_fscore_only', action='store_true',
                        help='If set, only print the total F-score number.'
                             ' Useful for using in an automated pipeline.')

    parser.add_argument('-v', '--verbose', action='store_true',
                        help='Turn on INFO messages.')
    parser.add_argument('--debug', action='store_true',
                        help='Turn on DEBUG messages.')

    return parser


[docs]def main(args):
    logging.info('Starting main...')
    _start_time = time.clock()

    # The algorithm:
    #  - build the cost function(s) for a pair of CropObjects
    #  - align the objects, using the cost function

    # First alignment: try just matching a predicted object to the nearest
    # true object.
    # First distance function: proportion of shared pixels.
    # Rule: if two objects don't share a pixel, they cannot be considered related.
    # Object classes do not factor into this so far.

    truth = parse_cropobject_list(args.true)
    prediction = parse_cropobject_list(args.prediction)

    _parse_time = time.clock()
    logging.info('Parsing {0} true and {1} prediction cropobjects took {2:.2f} s'
                 ''.format(len(truth), len(prediction), _parse_time - _start_time))

    r, p, f = cropobjects_rpf(truth, prediction)

    _rpf_time = time.clock()
    logging.info('Computing {0} entries of r/p/f matrices took {1:.2f} s'
                 ''.format(len(truth) * len(prediction), _rpf_time - _parse_time))

    alignment_tp = align_cropobjects(truth, prediction, fscore=f)
    alignment_pt = align_cropobjects(prediction, truth, fscore=f.T)

    # Intersect alignments
    _aln_tp_set = frozenset(alignment_tp)
    alignment_tp_symmetric = [(t, p) for p, t in alignment_pt
                              if (t, p) in _aln_tp_set
                              and (truth[t].clsname == prediction[p].clsname)]
    truth_not_aligned = [t for p, t in alignment_pt
                         if (t, p) not in alignment_tp_symmetric]
    n_truth_not_aligned = len(truth_not_aligned)
    preds_not_aligned = [p for t, p in alignment_tp
                         if (t, p) not in alignment_tp_symmetric]
    n_preds_not_aligned = len(preds_not_aligned)
    n_not_aligned = n_truth_not_aligned + n_preds_not_aligned

    _aln_time = time.clock()
    logging.info('Computing alignment took {0:.2f} s'
                 ''.format(_aln_time - _rpf_time))

    # Now compute agreement: precision and recall on pixels
    # of the aligned CropObjects.

    # We apply strict clsnames only here, after the CropObjects have been
    # aligned to each other using pixel metrics.
    _strict_clsnames = (not args.no_strict_clsnames)
    total_r, total_p, total_f = rpf_given_alignment(alignment_tp_symmetric, r, p,
                                                    n_not_aligned=n_not_aligned,
                                                    strict_clsnames=_strict_clsnames,
                                                    truths=truth,
                                                    predictions=prediction)

    if not args.print_fscore_only:
        print('Truth objs.:\t{0}'.format(len(truth)))
        print('Pred. objs.:\t{0}'.format(len(prediction)))
        print('Aligned objs.:\t{0}'.format(len(alignment_tp_symmetric)))
        print('==============================================')
        print('Recall:\t\t{0:.3f}\nPrecision:\t{1:.3f}\nF-score:\t{2:.3f}'
              ''.format(total_r, total_p, total_f))
        print('')
    else:
        print('{0:.3f}'.format(total_f))
        return

    if args.log_alignment:
        print('==============================================')
        print('Alignments:\n{0}'.format('\n'.join([
            '({0}: {1}) -- ({2}: {3})'.format(truth[t].objid, truth[t].clsname,
                                              prediction[p].objid, prediction[p].clsname)
            for t, p in alignment_tp_symmetric
        ])))
        print('Truth, not aligned:\n{0}'.format('\n'.join(['({0}: {1})'.format(truth[t].objid, truth[t].clsname)
                                                           for t in truth_not_aligned])))
        print(
            'Preds, not aligned:\n{0}'.format('\n'.join(['({0}: {1})'.format(prediction[p].objid, prediction[p].clsname)
                                                         for p in preds_not_aligned])))

    ##########################################################################
    # Check if the alignment is a pairing -- find truth objects
    # with more than one prediction aligned to them.
    if args.analyze_alignment:
        t_aln_dict = collections.defaultdict(list)
        for i, j in alignment_tp_symmetric:
            t_aln_dict[i].append(prediction[j])

        multiple_truths = [truth[i] for i in t_aln_dict
                           if len(t_aln_dict[i]) > 1]
        multiple_truths_aln_dict = {t: t_aln_dict[t]
                                    for t in t_aln_dict
                                    if len(t_aln_dict[t]) > 1}

        print('Truth multi-aligned CropObject classes:\n{0}'
              ''.format(pprint.pformat(
            {(truth[t].objid, truth[t].clsname): [(p.objid, p.clsname)
                                                  for p in t_aln_dict[t]]
             for t in multiple_truths_aln_dict})))

    ##########################################################################
    # Check if the aligned objects have the same classes
    if args.analyze_clsnames:
        different_clsnames_pairs = []
        for i, j in alignment_tp_symmetric:
            if truth[i].clsname != prediction[j].clsname:
                different_clsnames_pairs.append((truth[i], prediction[j]))
        print('Aligned pairs with different clsnames:\n{0}'
              ''.format('\n'.join(['{0}.{1}\t{2}.{3}'
                                   ''.format(t.objid, t.clsname, p.objid, p.clsname)
                                   for t, p in different_clsnames_pairs])))

    _end_time = time.clock()
    logging.info('analyze_agreement.py done in {0:.3f} s'.format(_end_time - _start_time))


if __name__ == '__main__':
    parser = build_argument_parser()
    args = parser.parse_args()

    if args.verbose:
        logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
    if args.debug:
        logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)

    main(args)
Source code for scripts.analyze_agreement

muscima

Navigation

Related Topics