Chris@87: """A file interface for handling local and remote data files.
Chris@87: 
Chris@87: The goal of datasource is to abstract some of the file system operations
Chris@87: when dealing with data files so the researcher doesn't have to know all the
Chris@87: low-level details.  Through datasource, a researcher can obtain and use a
Chris@87: file with one function call, regardless of location of the file.
Chris@87: 
Chris@87: DataSource is meant to augment standard python libraries, not replace them.
Chris@87: It should work seemlessly with standard file IO operations and the os
Chris@87: module.
Chris@87: 
Chris@87: DataSource files can originate locally or remotely:
Chris@87: 
Chris@87: - local files : '/home/guido/src/local/data.txt'
Chris@87: - URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt'
Chris@87: 
Chris@87: DataSource files can also be compressed or uncompressed.  Currently only
Chris@87: gzip and bz2 are supported.
Chris@87: 
Chris@87: Example::
Chris@87: 
Chris@87:     >>> # Create a DataSource, use os.curdir (default) for local storage.
Chris@87:     >>> ds = datasource.DataSource()
Chris@87:     >>>
Chris@87:     >>> # Open a remote file.
Chris@87:     >>> # DataSource downloads the file, stores it locally in:
Chris@87:     >>> #     './www.google.com/index.html'
Chris@87:     >>> # opens the file and returns a file object.
Chris@87:     >>> fp = ds.open('http://www.google.com/index.html')
Chris@87:     >>>
Chris@87:     >>> # Use the file as you normally would
Chris@87:     >>> fp.read()
Chris@87:     >>> fp.close()
Chris@87: 
Chris@87: """
Chris@87: from __future__ import division, absolute_import, print_function
Chris@87: 
Chris@87: import os
Chris@87: import sys
Chris@87: import shutil
Chris@87: 
Chris@87: _open = open
Chris@87: 
Chris@87: 
Chris@87: # Using a class instead of a module-level dictionary
Chris@87: # to reduce the inital 'import numpy' overhead by
Chris@87: # deferring the import of bz2 and gzip until needed
Chris@87: 
Chris@87: # TODO: .zip support, .tar support?
Chris@87: class _FileOpeners(object):
Chris@87:     """
Chris@87:     Container for different methods to open (un-)compressed files.
Chris@87: 
Chris@87:     `_FileOpeners` contains a dictionary that holds one method for each
Chris@87:     supported file format. Attribute lookup is implemented in such a way
Chris@87:     that an instance of `_FileOpeners` itself can be indexed with the keys
Chris@87:     of that dictionary. Currently uncompressed files as well as files
Chris@87:     compressed with ``gzip`` or ``bz2`` compression are supported.
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     `_file_openers`, an instance of `_FileOpeners`, is made available for
Chris@87:     use in the `_datasource` module.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     >>> np.lib._datasource._file_openers.keys()
Chris@87:     [None, '.bz2', '.gz']
Chris@87:     >>> np.lib._datasource._file_openers['.gz'] is gzip.open
Chris@87:     True
Chris@87: 
Chris@87:     """
Chris@87: 
Chris@87:     def __init__(self):
Chris@87:         self._loaded = False
Chris@87:         self._file_openers = {None: open}
Chris@87: 
Chris@87:     def _load(self):
Chris@87:         if self._loaded:
Chris@87:             return
Chris@87:         try:
Chris@87:             import bz2
Chris@87:             self._file_openers[".bz2"] = bz2.BZ2File
Chris@87:         except ImportError:
Chris@87:             pass
Chris@87:         try:
Chris@87:             import gzip
Chris@87:             self._file_openers[".gz"] = gzip.open
Chris@87:         except ImportError:
Chris@87:             pass
Chris@87:         self._loaded = True
Chris@87: 
Chris@87:     def keys(self):
Chris@87:         """
Chris@87:         Return the keys of currently supported file openers.
Chris@87: 
Chris@87:         Parameters
Chris@87:         ----------
Chris@87:         None
Chris@87: 
Chris@87:         Returns
Chris@87:         -------
Chris@87:         keys : list
Chris@87:             The keys are None for uncompressed files and the file extension
Chris@87:             strings (i.e. ``'.gz'``, ``'.bz2'``) for supported compression
Chris@87:             methods.
Chris@87: 
Chris@87:         """
Chris@87:         self._load()
Chris@87:         return list(self._file_openers.keys())
Chris@87: 
Chris@87:     def __getitem__(self, key):
Chris@87:         self._load()
Chris@87:         return self._file_openers[key]
Chris@87: 
Chris@87: _file_openers = _FileOpeners()
Chris@87: 
Chris@87: def open(path, mode='r', destpath=os.curdir):
Chris@87:     """
Chris@87:     Open `path` with `mode` and return the file object.
Chris@87: 
Chris@87:     If ``path`` is an URL, it will be downloaded, stored in the
Chris@87:     `DataSource` `destpath` directory and opened from there.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     path : str
Chris@87:         Local file path or URL to open.
Chris@87:     mode : str, optional
Chris@87:         Mode to open `path`. Mode 'r' for reading, 'w' for writing, 'a' to
Chris@87:         append. Available modes depend on the type of object specified by
Chris@87:         path.  Default is 'r'.
Chris@87:     destpath : str, optional
Chris@87:         Path to the directory where the source file gets downloaded to for
Chris@87:         use.  If `destpath` is None, a temporary directory will be created.
Chris@87:         The default path is the current directory.
Chris@87: 
Chris@87:     Returns
Chris@87:     -------
Chris@87:     out : file object
Chris@87:         The opened file.
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     This is a convenience function that instantiates a `DataSource` and
Chris@87:     returns the file object from ``DataSource.open(path)``.
Chris@87: 
Chris@87:     """
Chris@87: 
Chris@87:     ds = DataSource(destpath)
Chris@87:     return ds.open(path, mode)
Chris@87: 
Chris@87: 
Chris@87: class DataSource (object):
Chris@87:     """
Chris@87:     DataSource(destpath='.')
Chris@87: 
Chris@87:     A generic data source file (file, http, ftp, ...).
Chris@87: 
Chris@87:     DataSources can be local files or remote files/URLs.  The files may
Chris@87:     also be compressed or uncompressed. DataSource hides some of the
Chris@87:     low-level details of downloading the file, allowing you to simply pass
Chris@87:     in a valid file path (or URL) and obtain a file object.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     destpath : str or None, optional
Chris@87:         Path to the directory where the source file gets downloaded to for
Chris@87:         use.  If `destpath` is None, a temporary directory will be created.
Chris@87:         The default path is the current directory.
Chris@87: 
Chris@87:     Notes
Chris@87:     -----
Chris@87:     URLs require a scheme string (``http://``) to be used, without it they
Chris@87:     will fail::
Chris@87: 
Chris@87:         >>> repos = DataSource()
Chris@87:         >>> repos.exists('www.google.com/index.html')
Chris@87:         False
Chris@87:         >>> repos.exists('http://www.google.com/index.html')
Chris@87:         True
Chris@87: 
Chris@87:     Temporary directories are deleted when the DataSource is deleted.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     ::
Chris@87: 
Chris@87:         >>> ds = DataSource('/home/guido')
Chris@87:         >>> urlname = 'http://www.google.com/index.html'
Chris@87:         >>> gfile = ds.open('http://www.google.com/index.html')  # remote file
Chris@87:         >>> ds.abspath(urlname)
Chris@87:         '/home/guido/www.google.com/site/index.html'
Chris@87: 
Chris@87:         >>> ds = DataSource(None)  # use with temporary file
Chris@87:         >>> ds.open('/home/guido/foobar.txt')
Chris@87:         <open file '/home/guido.foobar.txt', mode 'r' at 0x91d4430>
Chris@87:         >>> ds.abspath('/home/guido/foobar.txt')
Chris@87:         '/tmp/tmpy4pgsP/home/guido/foobar.txt'
Chris@87: 
Chris@87:     """
Chris@87: 
Chris@87:     def __init__(self, destpath=os.curdir):
Chris@87:         """Create a DataSource with a local path at destpath."""
Chris@87:         if destpath:
Chris@87:             self._destpath = os.path.abspath(destpath)
Chris@87:             self._istmpdest = False
Chris@87:         else:
Chris@87:             import tempfile  # deferring import to improve startup time
Chris@87:             self._destpath = tempfile.mkdtemp()
Chris@87:             self._istmpdest = True
Chris@87: 
Chris@87:     def __del__(self):
Chris@87:         # Remove temp directories
Chris@87:         if self._istmpdest:
Chris@87:             shutil.rmtree(self._destpath)
Chris@87: 
Chris@87:     def _iszip(self, filename):
Chris@87:         """Test if the filename is a zip file by looking at the file extension.
Chris@87: 
Chris@87:         """
Chris@87:         fname, ext = os.path.splitext(filename)
Chris@87:         return ext in _file_openers.keys()
Chris@87: 
Chris@87:     def _iswritemode(self, mode):
Chris@87:         """Test if the given mode will open a file for writing."""
Chris@87: 
Chris@87:         # Currently only used to test the bz2 files.
Chris@87:         _writemodes = ("w", "+")
Chris@87:         for c in mode:
Chris@87:             if c in _writemodes:
Chris@87:                 return True
Chris@87:         return False
Chris@87: 
Chris@87:     def _splitzipext(self, filename):
Chris@87:         """Split zip extension from filename and return filename.
Chris@87: 
Chris@87:         *Returns*:
Chris@87:             base, zip_ext : {tuple}
Chris@87: 
Chris@87:         """
Chris@87: 
Chris@87:         if self._iszip(filename):
Chris@87:             return os.path.splitext(filename)
Chris@87:         else:
Chris@87:             return filename, None
Chris@87: 
Chris@87:     def _possible_names(self, filename):
Chris@87:         """Return a tuple containing compressed filename variations."""
Chris@87:         names = [filename]
Chris@87:         if not self._iszip(filename):
Chris@87:             for zipext in _file_openers.keys():
Chris@87:                 if zipext:
Chris@87:                     names.append(filename+zipext)
Chris@87:         return names
Chris@87: 
Chris@87:     def _isurl(self, path):
Chris@87:         """Test if path is a net location.  Tests the scheme and netloc."""
Chris@87: 
Chris@87:         # We do this here to reduce the 'import numpy' initial import time.
Chris@87:         if sys.version_info[0] >= 3:
Chris@87:             from urllib.parse import urlparse
Chris@87:         else:
Chris@87:             from urlparse import urlparse
Chris@87: 
Chris@87:         # BUG : URLs require a scheme string ('http://') to be used.
Chris@87:         #       www.google.com will fail.
Chris@87:         #       Should we prepend the scheme for those that don't have it and
Chris@87:         #       test that also?  Similar to the way we append .gz and test for
Chris@87:         #       for compressed versions of files.
Chris@87: 
Chris@87:         scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path)
Chris@87:         return bool(scheme and netloc)
Chris@87: 
Chris@87:     def _cache(self, path):
Chris@87:         """Cache the file specified by path.
Chris@87: 
Chris@87:         Creates a copy of the file in the datasource cache.
Chris@87: 
Chris@87:         """
Chris@87:         # We import these here because importing urllib2 is slow and
Chris@87:         # a significant fraction of numpy's total import time.
Chris@87:         if sys.version_info[0] >= 3:
Chris@87:             from urllib.request import urlopen
Chris@87:             from urllib.error import URLError
Chris@87:         else:
Chris@87:             from urllib2 import urlopen
Chris@87:             from urllib2 import URLError
Chris@87: 
Chris@87:         upath = self.abspath(path)
Chris@87: 
Chris@87:         # ensure directory exists
Chris@87:         if not os.path.exists(os.path.dirname(upath)):
Chris@87:             os.makedirs(os.path.dirname(upath))
Chris@87: 
Chris@87:         # TODO: Doesn't handle compressed files!
Chris@87:         if self._isurl(path):
Chris@87:             try:
Chris@87:                 openedurl = urlopen(path)
Chris@87:                 f = _open(upath, 'wb')
Chris@87:                 try:
Chris@87:                     shutil.copyfileobj(openedurl, f)
Chris@87:                 finally:
Chris@87:                     f.close()
Chris@87:                     openedurl.close()
Chris@87:             except URLError:
Chris@87:                 raise URLError("URL not found: %s" % path)
Chris@87:         else:
Chris@87:             shutil.copyfile(path, upath)
Chris@87:         return upath
Chris@87: 
Chris@87:     def _findfile(self, path):
Chris@87:         """Searches for ``path`` and returns full path if found.
Chris@87: 
Chris@87:         If path is an URL, _findfile will cache a local copy and return the
Chris@87:         path to the cached file.  If path is a local file, _findfile will
Chris@87:         return a path to that local file.
Chris@87: 
Chris@87:         The search will include possible compressed versions of the file
Chris@87:         and return the first occurence found.
Chris@87: 
Chris@87:         """
Chris@87: 
Chris@87:         # Build list of possible local file paths
Chris@87:         if not self._isurl(path):
Chris@87:             # Valid local paths
Chris@87:             filelist = self._possible_names(path)
Chris@87:             # Paths in self._destpath
Chris@87:             filelist += self._possible_names(self.abspath(path))
Chris@87:         else:
Chris@87:             # Cached URLs in self._destpath
Chris@87:             filelist = self._possible_names(self.abspath(path))
Chris@87:             # Remote URLs
Chris@87:             filelist = filelist + self._possible_names(path)
Chris@87: 
Chris@87:         for name in filelist:
Chris@87:             if self.exists(name):
Chris@87:                 if self._isurl(name):
Chris@87:                     name = self._cache(name)
Chris@87:                 return name
Chris@87:         return None
Chris@87: 
Chris@87:     def abspath(self, path):
Chris@87:         """
Chris@87:         Return absolute path of file in the DataSource directory.
Chris@87: 
Chris@87:         If `path` is an URL, then `abspath` will return either the location
Chris@87:         the file exists locally or the location it would exist when opened
Chris@87:         using the `open` method.
Chris@87: 
Chris@87:         Parameters
Chris@87:         ----------
Chris@87:         path : str
Chris@87:             Can be a local file or a remote URL.
Chris@87: 
Chris@87:         Returns
Chris@87:         -------
Chris@87:         out : str
Chris@87:             Complete path, including the `DataSource` destination directory.
Chris@87: 
Chris@87:         Notes
Chris@87:         -----
Chris@87:         The functionality is based on `os.path.abspath`.
Chris@87: 
Chris@87:         """
Chris@87:         # We do this here to reduce the 'import numpy' initial import time.
Chris@87:         if sys.version_info[0] >= 3:
Chris@87:             from urllib.parse import urlparse
Chris@87:         else:
Chris@87:             from urlparse import urlparse
Chris@87: 
Chris@87:         # TODO:  This should be more robust.  Handles case where path includes
Chris@87:         #        the destpath, but not other sub-paths. Failing case:
Chris@87:         #        path = /home/guido/datafile.txt
Chris@87:         #        destpath = /home/alex/
Chris@87:         #        upath = self.abspath(path)
Chris@87:         #        upath == '/home/alex/home/guido/datafile.txt'
Chris@87: 
Chris@87:         # handle case where path includes self._destpath
Chris@87:         splitpath = path.split(self._destpath, 2)
Chris@87:         if len(splitpath) > 1:
Chris@87:             path = splitpath[1]
Chris@87:         scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path)
Chris@87:         netloc = self._sanitize_relative_path(netloc)
Chris@87:         upath = self._sanitize_relative_path(upath)
Chris@87:         return os.path.join(self._destpath, netloc, upath)
Chris@87: 
Chris@87:     def _sanitize_relative_path(self, path):
Chris@87:         """Return a sanitised relative path for which
Chris@87:         os.path.abspath(os.path.join(base, path)).startswith(base)
Chris@87:         """
Chris@87:         last = None
Chris@87:         path = os.path.normpath(path)
Chris@87:         while path != last:
Chris@87:             last = path
Chris@87:             # Note: os.path.join treats '/' as os.sep on Windows
Chris@87:             path = path.lstrip(os.sep).lstrip('/')
Chris@87:             path = path.lstrip(os.pardir).lstrip('..')
Chris@87:             drive, path = os.path.splitdrive(path)  # for Windows
Chris@87:         return path
Chris@87: 
Chris@87:     def exists(self, path):
Chris@87:         """
Chris@87:         Test if path exists.
Chris@87: 
Chris@87:         Test if `path` exists as (and in this order):
Chris@87: 
Chris@87:         - a local file.
Chris@87:         - a remote URL that has been downloaded and stored locally in the
Chris@87:           `DataSource` directory.
Chris@87:         - a remote URL that has not been downloaded, but is valid and
Chris@87:           accessible.
Chris@87: 
Chris@87:         Parameters
Chris@87:         ----------
Chris@87:         path : str
Chris@87:             Can be a local file or a remote URL.
Chris@87: 
Chris@87:         Returns
Chris@87:         -------
Chris@87:         out : bool
Chris@87:             True if `path` exists.
Chris@87: 
Chris@87:         Notes
Chris@87:         -----
Chris@87:         When `path` is an URL, `exists` will return True if it's either
Chris@87:         stored locally in the `DataSource` directory, or is a valid remote
Chris@87:         URL.  `DataSource` does not discriminate between the two, the file
Chris@87:         is accessible if it exists in either location.
Chris@87: 
Chris@87:         """
Chris@87:         # We import this here because importing urllib2 is slow and
Chris@87:         # a significant fraction of numpy's total import time.
Chris@87:         if sys.version_info[0] >= 3:
Chris@87:             from urllib.request import urlopen
Chris@87:             from urllib.error import URLError
Chris@87:         else:
Chris@87:             from urllib2 import urlopen
Chris@87:             from urllib2 import URLError
Chris@87: 
Chris@87:         # Test local path
Chris@87:         if os.path.exists(path):
Chris@87:             return True
Chris@87: 
Chris@87:         # Test cached url
Chris@87:         upath = self.abspath(path)
Chris@87:         if os.path.exists(upath):
Chris@87:             return True
Chris@87: 
Chris@87:         # Test remote url
Chris@87:         if self._isurl(path):
Chris@87:             try:
Chris@87:                 netfile = urlopen(path)
Chris@87:                 netfile.close()
Chris@87:                 del(netfile)
Chris@87:                 return True
Chris@87:             except URLError:
Chris@87:                 return False
Chris@87:         return False
Chris@87: 
Chris@87:     def open(self, path, mode='r'):
Chris@87:         """
Chris@87:         Open and return file-like object.
Chris@87: 
Chris@87:         If `path` is an URL, it will be downloaded, stored in the
Chris@87:         `DataSource` directory and opened from there.
Chris@87: 
Chris@87:         Parameters
Chris@87:         ----------
Chris@87:         path : str
Chris@87:             Local file path or URL to open.
Chris@87:         mode : {'r', 'w', 'a'}, optional
Chris@87:             Mode to open `path`.  Mode 'r' for reading, 'w' for writing,
Chris@87:             'a' to append. Available modes depend on the type of object
Chris@87:             specified by `path`. Default is 'r'.
Chris@87: 
Chris@87:         Returns
Chris@87:         -------
Chris@87:         out : file object
Chris@87:             File object.
Chris@87: 
Chris@87:         """
Chris@87: 
Chris@87:         # TODO: There is no support for opening a file for writing which
Chris@87:         #       doesn't exist yet (creating a file).  Should there be?
Chris@87: 
Chris@87:         # TODO: Add a ``subdir`` parameter for specifying the subdirectory
Chris@87:         #       used to store URLs in self._destpath.
Chris@87: 
Chris@87:         if self._isurl(path) and self._iswritemode(mode):
Chris@87:             raise ValueError("URLs are not writeable")
Chris@87: 
Chris@87:         # NOTE: _findfile will fail on a new file opened for writing.
Chris@87:         found = self._findfile(path)
Chris@87:         if found:
Chris@87:             _fname, ext = self._splitzipext(found)
Chris@87:             if ext == 'bz2':
Chris@87:                 mode.replace("+", "")
Chris@87:             return _file_openers[ext](found, mode=mode)
Chris@87:         else:
Chris@87:             raise IOError("%s not found." % path)
Chris@87: 
Chris@87: 
Chris@87: class Repository (DataSource):
Chris@87:     """
Chris@87:     Repository(baseurl, destpath='.')
Chris@87: 
Chris@87:     A data repository where multiple DataSource's share a base
Chris@87:     URL/directory.
Chris@87: 
Chris@87:     `Repository` extends `DataSource` by prepending a base URL (or
Chris@87:     directory) to all the files it handles. Use `Repository` when you will
Chris@87:     be working with multiple files from one base URL.  Initialize
Chris@87:     `Repository` with the base URL, then refer to each file by its filename
Chris@87:     only.
Chris@87: 
Chris@87:     Parameters
Chris@87:     ----------
Chris@87:     baseurl : str
Chris@87:         Path to the local directory or remote location that contains the
Chris@87:         data files.
Chris@87:     destpath : str or None, optional
Chris@87:         Path to the directory where the source file gets downloaded to for
Chris@87:         use.  If `destpath` is None, a temporary directory will be created.
Chris@87:         The default path is the current directory.
Chris@87: 
Chris@87:     Examples
Chris@87:     --------
Chris@87:     To analyze all files in the repository, do something like this
Chris@87:     (note: this is not self-contained code)::
Chris@87: 
Chris@87:         >>> repos = np.lib._datasource.Repository('/home/user/data/dir/')
Chris@87:         >>> for filename in filelist:
Chris@87:         ...     fp = repos.open(filename)
Chris@87:         ...     fp.analyze()
Chris@87:         ...     fp.close()
Chris@87: 
Chris@87:     Similarly you could use a URL for a repository::
Chris@87: 
Chris@87:         >>> repos = np.lib._datasource.Repository('http://www.xyz.edu/data')
Chris@87: 
Chris@87:     """
Chris@87: 
Chris@87:     def __init__(self, baseurl, destpath=os.curdir):
Chris@87:         """Create a Repository with a shared url or directory of baseurl."""
Chris@87:         DataSource.__init__(self, destpath=destpath)
Chris@87:         self._baseurl = baseurl
Chris@87: 
Chris@87:     def __del__(self):
Chris@87:         DataSource.__del__(self)
Chris@87: 
Chris@87:     def _fullpath(self, path):
Chris@87:         """Return complete path for path.  Prepends baseurl if necessary."""
Chris@87:         splitpath = path.split(self._baseurl, 2)
Chris@87:         if len(splitpath) == 1:
Chris@87:             result = os.path.join(self._baseurl, path)
Chris@87:         else:
Chris@87:             result = path    # path contains baseurl already
Chris@87:         return result
Chris@87: 
Chris@87:     def _findfile(self, path):
Chris@87:         """Extend DataSource method to prepend baseurl to ``path``."""
Chris@87:         return DataSource._findfile(self, self._fullpath(path))
Chris@87: 
Chris@87:     def abspath(self, path):
Chris@87:         """
Chris@87:         Return absolute path of file in the Repository directory.
Chris@87: 
Chris@87:         If `path` is an URL, then `abspath` will return either the location
Chris@87:         the file exists locally or the location it would exist when opened
Chris@87:         using the `open` method.
Chris@87: 
Chris@87:         Parameters
Chris@87:         ----------
Chris@87:         path : str
Chris@87:             Can be a local file or a remote URL. This may, but does not
Chris@87:             have to, include the `baseurl` with which the `Repository` was
Chris@87:             initialized.
Chris@87: 
Chris@87:         Returns
Chris@87:         -------
Chris@87:         out : str
Chris@87:             Complete path, including the `DataSource` destination directory.
Chris@87: 
Chris@87:         """
Chris@87:         return DataSource.abspath(self, self._fullpath(path))
Chris@87: 
Chris@87:     def exists(self, path):
Chris@87:         """
Chris@87:         Test if path exists prepending Repository base URL to path.
Chris@87: 
Chris@87:         Test if `path` exists as (and in this order):
Chris@87: 
Chris@87:         - a local file.
Chris@87:         - a remote URL that has been downloaded and stored locally in the
Chris@87:           `DataSource` directory.
Chris@87:         - a remote URL that has not been downloaded, but is valid and
Chris@87:           accessible.
Chris@87: 
Chris@87:         Parameters
Chris@87:         ----------
Chris@87:         path : str
Chris@87:             Can be a local file or a remote URL. This may, but does not
Chris@87:             have to, include the `baseurl` with which the `Repository` was
Chris@87:             initialized.
Chris@87: 
Chris@87:         Returns
Chris@87:         -------
Chris@87:         out : bool
Chris@87:             True if `path` exists.
Chris@87: 
Chris@87:         Notes
Chris@87:         -----
Chris@87:         When `path` is an URL, `exists` will return True if it's either
Chris@87:         stored locally in the `DataSource` directory, or is a valid remote
Chris@87:         URL.  `DataSource` does not discriminate between the two, the file
Chris@87:         is accessible if it exists in either location.
Chris@87: 
Chris@87:         """
Chris@87:         return DataSource.exists(self, self._fullpath(path))
Chris@87: 
Chris@87:     def open(self, path, mode='r'):
Chris@87:         """
Chris@87:         Open and return file-like object prepending Repository base URL.
Chris@87: 
Chris@87:         If `path` is an URL, it will be downloaded, stored in the
Chris@87:         DataSource directory and opened from there.
Chris@87: 
Chris@87:         Parameters
Chris@87:         ----------
Chris@87:         path : str
Chris@87:             Local file path or URL to open. This may, but does not have to,
Chris@87:             include the `baseurl` with which the `Repository` was
Chris@87:             initialized.
Chris@87:         mode : {'r', 'w', 'a'}, optional
Chris@87:             Mode to open `path`.  Mode 'r' for reading, 'w' for writing,
Chris@87:             'a' to append. Available modes depend on the type of object
Chris@87:             specified by `path`. Default is 'r'.
Chris@87: 
Chris@87:         Returns
Chris@87:         -------
Chris@87:         out : file object
Chris@87:             File object.
Chris@87: 
Chris@87:         """
Chris@87:         return DataSource.open(self, self._fullpath(path), mode)
Chris@87: 
Chris@87:     def listdir(self):
Chris@87:         """
Chris@87:         List files in the source Repository.
Chris@87: 
Chris@87:         Returns
Chris@87:         -------
Chris@87:         files : list of str
Chris@87:             List of file names (not containing a directory part).
Chris@87: 
Chris@87:         Notes
Chris@87:         -----
Chris@87:         Does not currently work for remote repositories.
Chris@87: 
Chris@87:         """
Chris@87:         if self._isurl(self._baseurl):
Chris@87:             raise NotImplementedError(
Chris@87:                   "Directory listing of URLs, not supported yet.")
Chris@87:         else:
Chris@87:             return os.listdir(self._baseurl)