Source code for scripts.analyze_annotations

#!/usr/bin/env python
"""``analyze_annotation.py`` is a script that analyzes annotation results.

For an overview of command-line options, call::

  analyze_annotation.py -h


Functionality
-------------

* Count symbols
* Count symbol classes
* Compute symbol parameters per class (size, morphological features..?) [NOT IMPLEMENTED]

* Count relationships
* Count relationship classes
* Compute relationship parameters per class pair

"""
from __future__ import print_function, unicode_literals
from __future__ import division
import argparse
import collections
import json
import logging
import pprint
import time

import operator

from muscima.io import parse_cropobject_list, export_cropobject_graph
from muscima.cropobject import merge_cropobject_lists

__version__ = "0.0.1"
__author__ = "Jan Hajic jr."


[docs]def compute_cropobject_stats(cropobjects, edges=None): stats = collections.OrderedDict() # Count cropobjects stats['n_cropobjects'] = len(cropobjects) # Count cropobjects by class n_cropobjects_by_class = collections.defaultdict(int) for c in cropobjects: n_cropobjects_by_class[c.clsname] += 1 stats['n_cropobjects_by_class'] = n_cropobjects_by_class stats['n_cropobjects_distinct'] = len(n_cropobjects_by_class) if edges is not None: # Count relationships _cropobjects_dict = {c.objid: c for c in cropobjects} stats['n_relationships'] = len(edges) n_relationships_by_class = collections.defaultdict(int) for e in edges: fr, to = e c_fr = _cropobjects_dict[fr].clsname c_to = _cropobjects_dict[to].clsname n_relationships_by_class[(c_fr, c_to)] += 1 stats['n_relationships_by_class'] = n_relationships_by_class stats['n_relationships_distinct'] = len(n_relationships_by_class) return stats
[docs]def emit_stats_pprint(stats): # For now, just pretty-print. That means reformatting the insides # of the stats. print_stats = list() if 'n_cropobjects' in stats: print_stats.append(('n_cropobjects', stats['n_cropobjects'])) if 'n_cropobjects_by_class' in stats: print_stats.append(('n_cropobjects_by_class', sorted(list(stats['n_cropobjects_by_class'].items()), key=operator.itemgetter(1), reverse=True) )) if 'n_cropobjects_distinct' in stats: print_stats.append(('n_cropobjects_distinct', stats['n_cropobjects_distinct'])) if 'n_relationships' in stats: print_stats.append(('n_relationships', stats['n_relationships'])) if 'n_relationships_by_class' in stats: print_stats.append(('n_relationships_by_class', sorted(list(stats['n_relationships_by_class'].items()), key=operator.itemgetter(1), reverse=True))) if 'n_relationships_distinct' in stats: print_stats.append(('n_relationships_distinct', stats['n_relationships_distinct'])) pprint.pprint(print_stats)
##############################################################################
[docs]def build_argument_parser(): parser = argparse.ArgumentParser(description=__doc__, add_help=True, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-i', '--input', action='store', nargs='+', required=True, help='List of input CropObjectList files.') parser.add_argument('-e', '--emit', action='store', default='print', choices=['print', 'latex', 'json'], help='How should the analysis results be presented?') parser.add_argument('-v', '--verbose', action='store_true', help='Turn on INFO messages.') parser.add_argument('--debug', action='store_true', help='Turn on DEBUG messages.') return parser
[docs]def main(args): logging.info('Starting main...') _start_time = time.clock() # Parse individual CropObject lists. cropobject_lists = [] _n_parsed_cropobjects = 0 for i, f in enumerate(args.input): cs = parse_cropobject_list(f) cropobject_lists.append(cs) # Logging progress _n_parsed_cropobjects += len(cs) if i % 10 == 0 and i > 0: _time_parsing = time.clock() - _start_time _cropobjects_per_second = _n_parsed_cropobjects / _time_parsing logging.info('Parsed {0} cropobjects in {1:.2f} s ({2:.2f} objs/s)' ''.format(_n_parsed_cropobjects, _time_parsing, _cropobjects_per_second)) # Merge the CropObject lists into one. # This is done so that the resulting object graph can be manipulated # at once, without objid clashes. cropobjects = merge_cropobject_lists(*cropobject_lists) edges = export_cropobject_graph(cropobjects) _parse_end_time = time.clock() logging.info('Parsing took {0:.2f} s'.format(_parse_end_time - _start_time)) ########################################################################## # Analysis # Here's where the results are stored, for export into various # formats. (Currently, we only print them.) stats = compute_cropobject_stats(cropobjects, edges=edges) ########################################################################## # Export if args.emit == 'print': emit_stats_pprint(stats) # More export options: # - json # - latex table _end_time = time.clock() logging.info('analyze_annotations.py done in {0:.3f} s' ''.format(_end_time - _start_time))
if __name__ == '__main__': parser = build_argument_parser() args = parser.parse_args() if args.verbose: logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) if args.debug: logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) main(args)