Source code for scripts.get_images_from_muscima

#!/usr/bin/env python
"""``get_images_from_muscima.py`` is a script that copies out the images
for which MUSCIMA++ provides symbol annotations from a download
of the CVC-MUSCIMA staff removal dataset.

You have to download this dataset first and provide a path to its
root directory (meaning the directory which contains subdirs for
the individual CVC-MUSCIMA distortions)
to this script. Either supply it directly using the ``-r`` option,
or set a ``CVC_MUSCIMA_ROOT`` environmental variable.

Example invocation::

    get_images_from_muscima.py -o ./images -i 4:10 17:8 5:12 21:10 34:3

MUSCIMA++ 0.9 provides a file with the writer:number pairs for its 140
annotated images in this format, which you can feed to the script
with::

    get_images_from_muscima.py [...] -i `cat path/to/MUSCIMA++/specifications/cvc-muscima-image-list.txt

For an overview of all command-line options, call::

  get_images_from_muscima.py -h

"""
from __future__ import print_function, unicode_literals
from builtins import zip
import argparse
import logging
import os
import time

import shutil

import muscima.dataset

__version__ = "0.0.1"
__author__ = "Jan Hajic jr."


[docs]def build_argument_parser(): parser = argparse.ArgumentParser(description=__doc__, add_help=True, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-r', '--root', action='store', default=muscima.dataset.CVC_MUSCIMA_ROOT, help='CVC-MUSCIMA dataset root directory (should' ' contain subdirectories named after the' ' CVC-MUSCIMA distortions).') parser.add_argument('-o', '--outdir', action='store', help='Output directory for the copied files.' ' If it does not exist, it will be created.') parser.add_argument('-i', '--items', action='store', nargs='+', help='A list of writer:page pairs, such as 22:4.') parser.add_argument('-f', '--format', action='store', default='CVC-MUSCIMA_W-{w:02}_N-{n:02}_D-ideal', help='The desired output filenames. {w} and {n}' ' stand for writer and page number: for' ' item 4:22, for instance, the filename' ' would be CVC_MUSCIMA_W-22_N-04_D-ideal.png' ' (the *.png suffix is retained from the' ' corresponding CVC-MUSCIMA file).') parser.add_argument('-m', '--mode', action='store', default='symbol', help='The CVC-MUSCIMA image mode: \'full\', \'symbol\',' ' or \'staff_only\'.') parser.add_argument('-v', '--verbose', action='store_true', help='Turn on INFO messages.') parser.add_argument('--debug', action='store_true', help='Turn on DEBUG messages.') return parser
[docs]def main(args): logging.info('Starting main...') _start_time = time.clock() if not os.path.isdir(args.outdir): os.mkdir(args.outdir) dataset = muscima.dataset.CVC_MUSCIMA(root=args.root) writers = [] pages = [] for item in args.items: ws, ps = item.split(':') writers.append(int(ws)) pages.append(int(ps)) for w, p in zip(writers, pages): imfile = dataset.imfile(page=p, writer=w, distortion='ideal', mode=args.mode) # Format the filename _, out_ext = os.path.splitext(imfile) out_fname = args.format.format(w=w, n=p) + out_ext out_file = os.path.join(args.outdir, out_fname) # Copy the file shutil.copyfile(imfile, out_file) _end_time = time.clock() logging.info('get_images_from_muscima.py done in {0:.3f} s' ''.format(_end_time - _start_time))
if __name__ == '__main__': parser = build_argument_parser() args = parser.parse_args() if args.verbose: logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) if args.debug: logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) main(args)