Source code for ocrd_utils.os

"""
Operating system functions.
"""
__all__ = [
    'abspath',
    'pushd_popd',
    'unzip_file_to_dir',
    'list_resource_candidates',
    'atomic_write',
]

from tempfile import TemporaryDirectory
import contextlib
from os import getcwd, chdir, stat, chmod, umask, environ, scandir
from pathlib import Path
from os.path import exists, abspath as abspath_, join, isdir
from zipfile import ZipFile

from atomicwrites import atomic_write as atomic_write_, AtomicWriter

from .constants import XDG_DATA_HOME

[docs]def abspath(url): """ Get a full path to a file or file URL See os.abspath """ if url.startswith('file://'): url = url[len('file://'):] return abspath_(url)
[docs]@contextlib.contextmanager def pushd_popd(newcwd=None, tempdir=False): if newcwd and tempdir: raise Exception("pushd_popd can accept either newcwd or tempdir, not both") try: oldcwd = getcwd() except FileNotFoundError: # This happens when a directory is deleted before the context is exited oldcwd = '/tmp' try: if tempdir: with TemporaryDirectory() as tempcwd: chdir(tempcwd) yield tempcwd else: if newcwd: chdir(newcwd) yield newcwd finally: chdir(oldcwd)
[docs]def unzip_file_to_dir(path_to_zip, output_directory): """ Extract a ZIP archive to a directory """ z = ZipFile(path_to_zip, 'r') z.extractall(output_directory) z.close()
[docs]def list_resource_candidates(executable, fname, cwd=getcwd(), is_file=False, is_dir=False): """ Generate candidates for processor resources according to https://ocr-d.de/en/spec/ocrd_tool#file-parameters (except python-bundled) """ candidates = [] candidates.append(join(cwd, fname)) processor_path_var = '%s_PATH' % executable.replace('-', '_').upper() if processor_path_var in environ: candidates += [join(x, fname) for x in environ[processor_path_var].split(':')] candidates.append(join(XDG_DATA_HOME, 'ocrd-resources', executable, fname)) candidates.append(join('/usr/local/share/ocrd-resources', executable, fname)) if is_file: candidates = [c for c in candidates if Path(c).is_file()] if is_dir: candidates = [c for c in candidates if Path(c).is_dir()] return candidates
def list_all_resources(executable): """ List all processor resources in the filesystem according to https://ocr-d.de/en/spec/ocrd_tool#file-parameters (except python-bundled) """ candidates = [] # XXX cwd would list too many false positives # cwd_candidate = join(getcwd(), 'ocrd-resources', executable) # if Path(cwd_candidate).exists(): # candidates.append(cwd_candidate) processor_path_var = '%s_PATH' % executable.replace('-', '_').upper() if processor_path_var in environ: for processor_path in environ[processor_path_var].split(':'): if isdir(processor_path): candidates += list(scandir(processor_path)) datadir = join(XDG_DATA_HOME, 'ocrd-resources', executable) if isdir(datadir): candidates += list(scandir(datadir)) systemdir = join('/usr/local/share/ocrd-resources', executable) if isdir(systemdir): candidates += list(scandir(systemdir)) return [x.path for x in candidates] # ht @pabs3 # https://github.com/untitaker/python-atomicwrites/issues/42 class AtomicWriterPerms(AtomicWriter): def get_fileobject(self, **kwargs): f = super().get_fileobject(**kwargs) try: mode = stat(self._path).st_mode except FileNotFoundError: # Creating a new file, emulate what os.open() does mask = umask(0) umask(mask) mode = 0o664 & ~mask fd = f.fileno() chmod(fd, mode) return f
[docs]@contextlib.contextmanager def atomic_write(fpath): with atomic_write_(fpath, writer_cls=AtomicWriterPerms, overwrite=True) as f: yield f