Source code for autopew.io

"""
File operations for autopew. Contains a class to provide an generalized
interface to a series of file types/instruments which can be built upon
to add and register new IO capability.
"""
import inspect
import logging
import sys
from pathlib import Path

import pandas as pd

logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

from . import EPMA, laser

__all__ = [
    "laser",
    "EPMA",
    "PewIOSpecification",
    "PewCSV",
    "PewSCANCSV",
    "PewJEOLpos",
    "registered_extensions",
]


[docs]class PewIOSpecification(object): """ Template for input and output file handlers for autopew. These handers specify functions to import files to pandas DataFrames and the export of these filetypes from pandas DataFrames. """ extension = None type = None # type handlers for np.array, pd.DataFrame? # could add _read, _write, _verify methods which are then customized? def __init__(self, *args, **kwargs): pass
[docs] @classmethod def validate_dataframe(cls, df): """ Validate the output of a file reader against the minimum requirements for autopew. Parameters ---------- df : :class:`pandas.DataFrame` Dataframe to validate. """ try: # check input type assert isinstance(df, pd.DataFrame) except AssertionError as e: msg = "File reader needs to provide a pandas DataFrame." logger.warning(msg) raise e # check columns required = ["name", "x", "y"] absent = [c for c in required if c not in df.columns] if absent: msg = "Input dataframe missing required column(s): {}.".format( ", ".join(absent) ) logger.warning(msg) raise AssertionError(msg)
[docs]class PewCSV(PewIOSpecification): extension = ".csv" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs)
[docs] @classmethod def read(self, filepath, **kwargs): df = pd.read_csv(filepath, **kwargs) self.validate_dataframe(df) return df
[docs] @classmethod def write(self, df, filepath, **kwargs): self.validate_dataframe(df) return df.to_csv( str(filepath.with_suffix(self.extension)), **kwargs ) # str for # compatibility for Python 3.5
[docs]class PewSCANCSV(PewIOSpecification): extension = ".scancsv" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs)
[docs] @classmethod def read(self, filepath): df = laser.chromium.read_scancsv(filepath) self.validate_dataframe(df) return df
[docs] @classmethod def write(self, df, filepath, **kwargs): self.validate_dataframe(df) return laser.chromium.write_scancsv( df, filepath.with_suffix(self.extension), **kwargs )
[docs]class PewJEOLpos(PewIOSpecification): extension = ".pos" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs)
[docs] @classmethod def write(self, df, filepath, **kwargs): self.validate_dataframe(df) return EPMA.JEOL.write_pos(df, filepath.with_suffix(self.extension), **kwargs)
[docs]def registered_extensions(): """ Get a dictionary of registered extensions mapping the relevant IO specifications. Returns ------- :class:`dict` """ specs = inspect.getmembers( sys.modules[__name__], lambda cls: issubclass(cls, PewIOSpecification) if inspect.isclass(cls) else False, ) return { # needs to be this way around to allow duplicate extensions in values cls: cls.extension for (name, cls) in specs if cls.extension is not None # ignore PewIOSpecification with extension=None }
def get_filehandler(filepath=None, name=None): """ Get a registered file handler for autopew. Parameters ---------- filepath : :class:`str` | :class:`pathlib.Path` Filename or path to the file you want to read/write. name : :class:`str` Name of the file handler to use (subclass of :class:`PewIOSpecification`). Returns ------- handler : :class:`PewIOSpecification` """ if filepath is None and name is None: msg = "Please specify either a filename, handler name or both." raise NotImplementedError(msg) exts = registered_extensions() if name is None: # lookup by file only # get file extension ext = Path(filepath).suffix if ext in [None, ""]: raise NotImplementedError( "No extension found for file {}.".format(filepath) ) count = list(exts.values()).count(ext.lower()) if not count: msg = ( "Unrecognised file extension {}." "Check the docs for valid handlers.".format(name) ) raise IndexError(msg) elif count > 1: msg = ( "Multiple handlers found for extension {} -" "You'll need to specify the handler name." ) raise IndexError(msg) else: handler = [k for k, v in exts.items() if v == ext.lower()][0] logger.debug("Handler found for {}: {}".format(ext, handler)) return handler # lookup by name handlers = [cls for cls in exts.keys() if cls.__name__ == name] if not handlers: msg = ( "PewIOSpec {} not found in registered handlers." "Check the docs for valid handlers.".format(name) ) raise IndexError(msg) else: return handlers[0]