cloudpathlib.local¶
This module implements "Local" classes that mimic their associated cloudpathlib
non-local
counterparts but use the local filesystem in place of cloud storage. They can be used as drop-in
replacements, with the intent that you can use them as mock or monkepatch substitutes in your
tests. See "Testing code that uses cloudpathlib"
for usage examples.
Attributes¶
local_azure_blob_implementation: None
¶
local_gs_implementation: None
¶
local_s3_implementation: None
¶
Classes¶
LocalAzureBlobClient
¶
Replacement for AzureBlobClient that uses the local file system. Intended as a monkeypatch substitute when writing tests.
Methods¶
__init__(self, *args, **kwargs)
special
¶
Source code in cloudpathlib/local/implementations/azure.py
def __init__(self, *args, **kwargs):
cred_opts = [
kwargs.get("blob_service_client", None),
kwargs.get("connection_string", None),
kwargs.get("account_url", None),
os.getenv("AZURE_STORAGE_CONNECTION_STRING", None),
]
if all(opt is None for opt in cred_opts):
raise MissingCredentialsError(
"AzureBlobClient does not support anonymous instantiation. "
"Credentials are required; see docs for options."
)
super().__init__(*args, **kwargs)
AzureBlobPath(self, cloud_path: Union[str, ~BoundedCloudPath]) -> ~BoundedCloudPath
¶
Source code in cloudpathlib/local/implementations/azure.py
def CloudPath(self, cloud_path: Union[str, BoundedCloudPath]) -> BoundedCloudPath:
return self._cloud_meta.path_class(cloud_path=cloud_path, client=self)
CloudPath(self, cloud_path: Union[str, ~BoundedCloudPath]) -> ~BoundedCloudPath
inherited
¶
Source code in cloudpathlib/local/implementations/azure.py
def CloudPath(self, cloud_path: Union[str, BoundedCloudPath]) -> BoundedCloudPath:
return self._cloud_meta.path_class(cloud_path=cloud_path, client=self)
set_as_default_client(self) -> None
inherited
¶
Set this client instance as the default one used when instantiating cloud path instances for this cloud without a client specified.
Source code in cloudpathlib/local/implementations/azure.py
def set_as_default_client(self) -> None:
"""Set this client instance as the default one used when instantiating cloud path
instances for this cloud without a client specified."""
self.__class__._default_client = self
LocalAzureBlobPath
¶
Replacement for AzureBlobPath that uses the local file system. Intended as a monkeypatch substitute when writing tests.
Attributes¶
anchor: str
inherited
property
readonly
¶
The concatenation of the drive and root, or ''. (Docstring copied from pathlib.Path)
blob: str
property
readonly
¶
cloud_prefix: str
¶
container: str
property
readonly
¶
drive: str
property
readonly
¶
The drive prefix (letter or UNC path), if any. (Docstring copied from pathlib.Path)
etag
property
readonly
¶
fspath: str
inherited
property
readonly
¶
md5: str
property
readonly
¶
name
inherited
property
readonly
¶
The final path component, if any. (Docstring copied from pathlib.Path)
parent
inherited
property
readonly
¶
The logical parent of the path. (Docstring copied from pathlib.Path)
parents
inherited
property
readonly
¶
A sequence of this path's logical parents. (Docstring copied from pathlib.Path)
parts
inherited
property
readonly
¶
An object providing sequence-like access to the components in the filesystem path. (Docstring copied from pathlib.Path)
stem
inherited
property
readonly
¶
The final path component, minus its last suffix. (Docstring copied from pathlib.Path)
suffix
inherited
property
readonly
¶
The final component's last suffix, if any.
This includes the leading period. For example: '.txt' (Docstring copied from pathlib.Path)
suffixes
inherited
property
readonly
¶
A list of the final component's suffixes, if any.
These include the leading periods. For example: ['.tar', '.gz'] (Docstring copied from pathlib.Path)
Methods¶
__init__(self, cloud_path: Union[str, CloudPath], client: Optional[Client] = None)
inherited
special
¶
Source code in cloudpathlib/local/implementations/azure.py
def __init__(self, cloud_path: Union[str, "CloudPath"], client: Optional["Client"] = None):
self.is_valid_cloudpath(cloud_path, raise_on_error=True)
# versions of the raw string that provide useful methods
self._str = str(cloud_path)
self._url = urlparse(self._str)
self._path = PurePosixPath(f"/{self._no_prefix}")
# setup client
if client is None:
if isinstance(cloud_path, CloudPath):
client = cloud_path.client
else:
client = self._cloud_meta.client_class.get_default_client()
if not isinstance(client, self._cloud_meta.client_class):
raise ClientMismatchError(
f"Client of type [{client.__class__}] is not valid for cloud path of type "
f"[{self.__class__}]; must be instance of [{self._cloud_meta.client_class}], or "
f"None to use default client for this cloud path class."
)
self.client: Client = client
# track if local has been written to, if so it may need to be uploaded
self._dirty = False
# handle if local file gets opened
self._handle = None
as_uri(self) -> str
inherited
¶
Return the path as a 'file' URI. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def as_uri(self) -> str:
return str(self)
download_to(self, destination: Union[str, os.PathLike])
inherited
¶
Source code in cloudpathlib/local/implementations/azure.py
def download_to(self, destination: Union[str, os.PathLike]):
destination = Path(destination)
if self.is_file():
if destination.is_dir():
destination = destination / self.name
self.client._download_file(self, destination)
else:
destination.mkdir(exist_ok=True)
for f in self.iterdir():
rel = str(self)
if not rel.endswith("/"):
rel = rel + "/"
rel_dest = str(f)[len(rel) :]
f.download_to(destination / rel_dest)
exists(self) -> bool
inherited
¶
Whether this path exists. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def exists(self) -> bool:
return self.client._exists(self)
glob(self, pattern: str) -> Iterable[CloudPath]
inherited
¶
Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def glob(self, pattern: str) -> Iterable["CloudPath"]:
# strip cloud prefix from pattern if it is included
if pattern.startswith(self.cloud_prefix):
pattern = pattern[len(self.cloud_prefix) :]
# strip "drive" from pattern if it is included
if pattern.startswith(self.drive + "/"):
pattern = pattern[len(self.drive + "/") :]
# identify if pattern is recursive or not
recursive = False
if pattern.startswith("**/"):
pattern = pattern.split("/", 1)[-1]
recursive = True
for f in self.client._list_dir(self, recursive=recursive):
if fnmatch.fnmatch(f._no_prefix_no_drive, pattern):
yield f
is_dir(self) -> bool
inherited
¶
Whether this path is a directory. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def is_dir(self) -> bool:
return self.client._is_dir(self)
is_file(self) -> bool
inherited
¶
Whether this path is a regular file (also True for symlinks pointing to regular files). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def is_file(self) -> bool:
return self.client._is_file(self)
iterdir(self) -> Iterable[CloudPath]
inherited
¶
Iterate over the files in this directory. Does not yield any result for the special paths '.' and '..'. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def iterdir(self) -> Iterable["CloudPath"]:
for f in self.client._list_dir(self, recursive=False):
yield f
joinpath(self, *args)
inherited
¶
Combine this path with one or several arguments, and return a new path representing either a subpath (if all arguments are relative paths) or a totally different path (if one of the arguments is anchored). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def joinpath(self, *args):
return self._dispatch_to_path("joinpath", *args)
match(self, path_pattern)
inherited
¶
Return True if this path matches the given pattern. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def match(self, path_pattern):
# strip scheme from start of pattern before testing
if path_pattern.startswith(self.anchor + self.drive + "/"):
path_pattern = path_pattern[len(self.anchor + self.drive + "/") :]
return self._dispatch_to_path("match", path_pattern)
mkdir(self, parents = False, exist_ok = False)
¶
Create a new directory at this given path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def mkdir(self, parents=False, exist_ok=False):
# not possible to make empty directory on blob storage
pass
open(self, mode = 'r', buffering = -1, encoding = None, errors = None, newline = None, force_overwrite_from_cloud = False, force_overwrite_to_cloud = False) -> IO
inherited
¶
Open the file pointed by this path and return a file object, as the built-in open() function does. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def open(
self,
mode="r",
buffering=-1,
encoding=None,
errors=None,
newline=None,
force_overwrite_from_cloud=False, # extra kwarg not in pathlib
force_overwrite_to_cloud=False, # extra kwarg not in pathlib
) -> IO:
# if trying to call open on a directory that exists
if self.exists() and not self.is_file():
raise CloudPathIsADirectoryError(
f"Cannot open directory, only files. Tried to open ({self})"
)
if mode == "x" and self.exists():
raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.")
# TODO: consider streaming from client rather than DLing entire file to cache
self._refresh_cache(force_overwrite_from_cloud=force_overwrite_from_cloud)
# create any directories that may be needed if the file is new
if not self._local.exists():
self._local.parent.mkdir(parents=True, exist_ok=True)
original_mtime = 0
else:
original_mtime = self._local.stat().st_mtime
buffer = self._local.open(
mode=mode,
buffering=buffering,
encoding=encoding,
errors=errors,
newline=newline,
)
# write modes need special on closing the buffer
if any(m in mode for m in ("w", "+", "x", "a")):
# dirty, handle, patch close
original_close = buffer.close
# since we are pretending this is a cloud file, upload it to the cloud
# when the buffer is closed
def _patched_close(*args, **kwargs):
original_close(*args, **kwargs)
# original mtime should match what was in the cloud; because of system clocks or rounding
# by the cloud provider, the new version in our cache is "older" than the original version;
# explicitly set the new modified time to be after the original modified time.
if self._local.stat().st_mtime < original_mtime:
new_mtime = original_mtime + 1
os.utime(self._local, times=(new_mtime, new_mtime))
self._upload_local_to_cloud(force_overwrite_to_cloud=force_overwrite_to_cloud)
buffer.close = _patched_close
# keep reference in case we need to close when __del__ is called on this object
self._handle = buffer
# opened for write, so mark dirty
self._dirty = True
return buffer
read_bytes(self)
inherited
¶
Open the file in bytes mode, read it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def read_bytes(self):
return self._dispatch_to_local_cache_path("read_bytes")
read_text(self)
inherited
¶
Open the file in text mode, read it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def read_text(self):
return self._dispatch_to_local_cache_path("read_text")
rename(self, target: CloudPath) -> CloudPath
inherited
¶
Rename this path to the target path.
The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.
Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def rename(self, target: "CloudPath") -> "CloudPath":
# for cloud services replace == rename since we don't just rename,
# we actually move files
return self.replace(target)
replace(self, target: CloudPath) -> CloudPath
inherited
¶
Rename this path to the target path, overwriting if that path exists.
The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.
Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def replace(self, target: "CloudPath") -> "CloudPath":
if type(self) != type(target):
raise TypeError(
f"The target based to rename must be an instantiated class of type: {type(self)}"
)
if target.exists():
target.unlink()
self.client._move_file(self, target)
return target
rglob(self, pattern: str) -> Iterable[CloudPath]
inherited
¶
Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def rglob(self, pattern: str) -> Iterable["CloudPath"]:
return self.glob("**/" + pattern)
rmdir(self)
inherited
¶
Remove this directory. The directory must be empty. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def rmdir(self):
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmdir."
)
try:
next(self.iterdir())
raise DirectoryNotEmptyError(
f"Directory not empty: '{self}'. Use rmtree to delete recursively."
)
except StopIteration:
pass
self.client._remove(self)
rmtree(self)
inherited
¶
Delete an entire directory tree.
Source code in cloudpathlib/local/implementations/azure.py
def rmtree(self):
"""Delete an entire directory tree."""
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmtree."
)
self.client._remove(self)
samefile(self, other_path: CloudPath) -> bool
inherited
¶
Return whether other_path is the same or not as this file (as returned by os.path.samefile()). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def samefile(self, other_path: "CloudPath") -> bool:
# all cloud paths are absolute and the paths are used for hash
return self == other_path
stat(self)
inherited
¶
Return the result of the stat() system call on this path, like os.stat() does. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def stat(self):
try:
meta = self.client._stat(self)
except FileNotFoundError:
raise NoStatError(
f"No stats available for {self}; it may be a directory or not exist."
)
return meta
touch(self)
inherited
¶
Create this file with the given access mode, if it doesn't exist. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def touch(self):
self.client._touch(self)
unlink(self)
inherited
¶
Remove this file or link. If the path is a directory, use rmdir() instead. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def unlink(self):
if self.is_dir():
raise CloudPathIsADirectoryError(
f"Path {self} is a directory; call rmdir instead of unlink."
)
self.client._remove(self)
with_name(self, name)
inherited
¶
Return a new path with the file name changed. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def with_name(self, name):
return self._dispatch_to_path("with_name", name)
with_suffix(self, suffix)
inherited
¶
Return a new path with the file suffix changed. If the path has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def with_suffix(self, suffix):
return self._dispatch_to_path("with_suffix", suffix)
write_bytes(self, data: bytes)
inherited
¶
Open the file in bytes mode, write to it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def write_bytes(self, data: bytes):
"""Open the file in bytes mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1235-L1242
"""
# type-check for the buffer interface before truncating the file
view = memoryview(data)
with self.open(mode="wb") as f:
return f.write(view)
write_text(self, data: str, encoding = None, errors = None)
inherited
¶
Open the file in text mode, write to it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/azure.py
def write_text(self, data: str, encoding=None, errors=None):
"""Open the file in text mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1244-L1252
"""
if not isinstance(data, str):
raise TypeError("data must be str, not %s" % data.__class__.__name__)
with self.open(mode="w", encoding=encoding, errors=errors) as f:
return f.write(data)
LocalClient
¶
Abstract client for accessing objects the local filesystem. Subclasses are as a monkeypatch substitutes for normal Client subclasses when writing tests.
Methods¶
__init__(self, *args, *, local_cache_dir: Union[str, os.PathLike] = None, local_storage_dir: Union[str, os.PathLike] = None, **kwargs)
special
¶
Source code in cloudpathlib/local/localclient.py
def __init__(
self,
*args,
local_cache_dir: Optional[Union[str, os.PathLike]] = None,
local_storage_dir: Optional[Union[str, os.PathLike]] = None,
**kwargs,
):
# setup caching and local versions of file. use default temp dir if not provided
if local_storage_dir is None:
local_storage_dir = self.get_default_storage_dir()
self._local_storage_dir = Path(local_storage_dir)
super().__init__(local_cache_dir=local_cache_dir)
CloudPath(self, cloud_path: Union[str, ~BoundedCloudPath]) -> ~BoundedCloudPath
inherited
¶
Source code in cloudpathlib/local/localclient.py
def CloudPath(self, cloud_path: Union[str, BoundedCloudPath]) -> BoundedCloudPath:
return self._cloud_meta.path_class(cloud_path=cloud_path, client=self)
get_default_storage_dir() -> Path
classmethod
¶
Source code in cloudpathlib/local/localclient.py
@classmethod
def get_default_storage_dir(cls) -> Path:
if cls._default_storage_temp_dir is None:
cls._default_storage_temp_dir = TemporaryDirectory()
_temp_dirs_to_clean.append(cls._default_storage_temp_dir)
return Path(cls._default_storage_temp_dir.name)
reset_default_storage_dir() -> Path
classmethod
¶
Source code in cloudpathlib/local/localclient.py
@classmethod
def reset_default_storage_dir(cls) -> Path:
cls._default_storage_temp_dir = None
return cls.get_default_storage_dir()
set_as_default_client(self) -> None
inherited
¶
Set this client instance as the default one used when instantiating cloud path instances for this cloud without a client specified.
Source code in cloudpathlib/local/localclient.py
def set_as_default_client(self) -> None:
"""Set this client instance as the default one used when instantiating cloud path
instances for this cloud without a client specified."""
self.__class__._default_client = self
LocalGSClient
¶
Replacement for GSClient that uses the local file system. Intended as a monkeypatch substitute when writing tests.
Methods¶
__init__(self, *args, *, local_cache_dir: Union[str, os.PathLike] = None, local_storage_dir: Union[str, os.PathLike] = None, **kwargs)
inherited
special
¶
Source code in cloudpathlib/local/implementations/gs.py
def __init__(
self,
*args,
local_cache_dir: Optional[Union[str, os.PathLike]] = None,
local_storage_dir: Optional[Union[str, os.PathLike]] = None,
**kwargs,
):
# setup caching and local versions of file. use default temp dir if not provided
if local_storage_dir is None:
local_storage_dir = self.get_default_storage_dir()
self._local_storage_dir = Path(local_storage_dir)
super().__init__(local_cache_dir=local_cache_dir)
CloudPath(self, cloud_path: Union[str, ~BoundedCloudPath]) -> ~BoundedCloudPath
inherited
¶
Source code in cloudpathlib/local/implementations/gs.py
def CloudPath(self, cloud_path: Union[str, BoundedCloudPath]) -> BoundedCloudPath:
return self._cloud_meta.path_class(cloud_path=cloud_path, client=self)
GSPath(self, cloud_path: Union[str, ~BoundedCloudPath]) -> ~BoundedCloudPath
¶
Source code in cloudpathlib/local/implementations/gs.py
def CloudPath(self, cloud_path: Union[str, BoundedCloudPath]) -> BoundedCloudPath:
return self._cloud_meta.path_class(cloud_path=cloud_path, client=self)
set_as_default_client(self) -> None
inherited
¶
Set this client instance as the default one used when instantiating cloud path instances for this cloud without a client specified.
Source code in cloudpathlib/local/implementations/gs.py
def set_as_default_client(self) -> None:
"""Set this client instance as the default one used when instantiating cloud path
instances for this cloud without a client specified."""
self.__class__._default_client = self
LocalGSPath
¶
Replacement for GSPath that uses the local file system. Intended as a monkeypatch substitute when writing tests.
Attributes¶
anchor: str
inherited
property
readonly
¶
The concatenation of the drive and root, or ''. (Docstring copied from pathlib.Path)
blob: str
property
readonly
¶
bucket: str
property
readonly
¶
cloud_prefix: str
¶
drive: str
property
readonly
¶
The drive prefix (letter or UNC path), if any. (Docstring copied from pathlib.Path)
etag
property
readonly
¶
fspath: str
inherited
property
readonly
¶
name
inherited
property
readonly
¶
The final path component, if any. (Docstring copied from pathlib.Path)
parent
inherited
property
readonly
¶
The logical parent of the path. (Docstring copied from pathlib.Path)
parents
inherited
property
readonly
¶
A sequence of this path's logical parents. (Docstring copied from pathlib.Path)
parts
inherited
property
readonly
¶
An object providing sequence-like access to the components in the filesystem path. (Docstring copied from pathlib.Path)
stem
inherited
property
readonly
¶
The final path component, minus its last suffix. (Docstring copied from pathlib.Path)
suffix
inherited
property
readonly
¶
The final component's last suffix, if any.
This includes the leading period. For example: '.txt' (Docstring copied from pathlib.Path)
suffixes
inherited
property
readonly
¶
A list of the final component's suffixes, if any.
These include the leading periods. For example: ['.tar', '.gz'] (Docstring copied from pathlib.Path)
Methods¶
__init__(self, cloud_path: Union[str, CloudPath], client: Optional[Client] = None)
inherited
special
¶
Source code in cloudpathlib/local/implementations/gs.py
def __init__(self, cloud_path: Union[str, "CloudPath"], client: Optional["Client"] = None):
self.is_valid_cloudpath(cloud_path, raise_on_error=True)
# versions of the raw string that provide useful methods
self._str = str(cloud_path)
self._url = urlparse(self._str)
self._path = PurePosixPath(f"/{self._no_prefix}")
# setup client
if client is None:
if isinstance(cloud_path, CloudPath):
client = cloud_path.client
else:
client = self._cloud_meta.client_class.get_default_client()
if not isinstance(client, self._cloud_meta.client_class):
raise ClientMismatchError(
f"Client of type [{client.__class__}] is not valid for cloud path of type "
f"[{self.__class__}]; must be instance of [{self._cloud_meta.client_class}], or "
f"None to use default client for this cloud path class."
)
self.client: Client = client
# track if local has been written to, if so it may need to be uploaded
self._dirty = False
# handle if local file gets opened
self._handle = None
as_uri(self) -> str
inherited
¶
Return the path as a 'file' URI. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def as_uri(self) -> str:
return str(self)
download_to(self, destination: Union[str, os.PathLike])
inherited
¶
Source code in cloudpathlib/local/implementations/gs.py
def download_to(self, destination: Union[str, os.PathLike]):
destination = Path(destination)
if self.is_file():
if destination.is_dir():
destination = destination / self.name
self.client._download_file(self, destination)
else:
destination.mkdir(exist_ok=True)
for f in self.iterdir():
rel = str(self)
if not rel.endswith("/"):
rel = rel + "/"
rel_dest = str(f)[len(rel) :]
f.download_to(destination / rel_dest)
exists(self) -> bool
inherited
¶
Whether this path exists. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def exists(self) -> bool:
return self.client._exists(self)
glob(self, pattern: str) -> Iterable[CloudPath]
inherited
¶
Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def glob(self, pattern: str) -> Iterable["CloudPath"]:
# strip cloud prefix from pattern if it is included
if pattern.startswith(self.cloud_prefix):
pattern = pattern[len(self.cloud_prefix) :]
# strip "drive" from pattern if it is included
if pattern.startswith(self.drive + "/"):
pattern = pattern[len(self.drive + "/") :]
# identify if pattern is recursive or not
recursive = False
if pattern.startswith("**/"):
pattern = pattern.split("/", 1)[-1]
recursive = True
for f in self.client._list_dir(self, recursive=recursive):
if fnmatch.fnmatch(f._no_prefix_no_drive, pattern):
yield f
is_dir(self) -> bool
inherited
¶
Whether this path is a directory. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def is_dir(self) -> bool:
return self.client._is_dir(self)
is_file(self) -> bool
inherited
¶
Whether this path is a regular file (also True for symlinks pointing to regular files). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def is_file(self) -> bool:
return self.client._is_file(self)
iterdir(self) -> Iterable[CloudPath]
inherited
¶
Iterate over the files in this directory. Does not yield any result for the special paths '.' and '..'. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def iterdir(self) -> Iterable["CloudPath"]:
for f in self.client._list_dir(self, recursive=False):
yield f
joinpath(self, *args)
inherited
¶
Combine this path with one or several arguments, and return a new path representing either a subpath (if all arguments are relative paths) or a totally different path (if one of the arguments is anchored). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def joinpath(self, *args):
return self._dispatch_to_path("joinpath", *args)
match(self, path_pattern)
inherited
¶
Return True if this path matches the given pattern. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def match(self, path_pattern):
# strip scheme from start of pattern before testing
if path_pattern.startswith(self.anchor + self.drive + "/"):
path_pattern = path_pattern[len(self.anchor + self.drive + "/") :]
return self._dispatch_to_path("match", path_pattern)
mkdir(self, parents = False, exist_ok = False)
¶
Create a new directory at this given path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def mkdir(self, parents=False, exist_ok=False):
# not possible to make empty directory on gs
pass
open(self, mode = 'r', buffering = -1, encoding = None, errors = None, newline = None, force_overwrite_from_cloud = False, force_overwrite_to_cloud = False) -> IO
inherited
¶
Open the file pointed by this path and return a file object, as the built-in open() function does. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def open(
self,
mode="r",
buffering=-1,
encoding=None,
errors=None,
newline=None,
force_overwrite_from_cloud=False, # extra kwarg not in pathlib
force_overwrite_to_cloud=False, # extra kwarg not in pathlib
) -> IO:
# if trying to call open on a directory that exists
if self.exists() and not self.is_file():
raise CloudPathIsADirectoryError(
f"Cannot open directory, only files. Tried to open ({self})"
)
if mode == "x" and self.exists():
raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.")
# TODO: consider streaming from client rather than DLing entire file to cache
self._refresh_cache(force_overwrite_from_cloud=force_overwrite_from_cloud)
# create any directories that may be needed if the file is new
if not self._local.exists():
self._local.parent.mkdir(parents=True, exist_ok=True)
original_mtime = 0
else:
original_mtime = self._local.stat().st_mtime
buffer = self._local.open(
mode=mode,
buffering=buffering,
encoding=encoding,
errors=errors,
newline=newline,
)
# write modes need special on closing the buffer
if any(m in mode for m in ("w", "+", "x", "a")):
# dirty, handle, patch close
original_close = buffer.close
# since we are pretending this is a cloud file, upload it to the cloud
# when the buffer is closed
def _patched_close(*args, **kwargs):
original_close(*args, **kwargs)
# original mtime should match what was in the cloud; because of system clocks or rounding
# by the cloud provider, the new version in our cache is "older" than the original version;
# explicitly set the new modified time to be after the original modified time.
if self._local.stat().st_mtime < original_mtime:
new_mtime = original_mtime + 1
os.utime(self._local, times=(new_mtime, new_mtime))
self._upload_local_to_cloud(force_overwrite_to_cloud=force_overwrite_to_cloud)
buffer.close = _patched_close
# keep reference in case we need to close when __del__ is called on this object
self._handle = buffer
# opened for write, so mark dirty
self._dirty = True
return buffer
read_bytes(self)
inherited
¶
Open the file in bytes mode, read it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def read_bytes(self):
return self._dispatch_to_local_cache_path("read_bytes")
read_text(self)
inherited
¶
Open the file in text mode, read it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def read_text(self):
return self._dispatch_to_local_cache_path("read_text")
rename(self, target: CloudPath) -> CloudPath
inherited
¶
Rename this path to the target path.
The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.
Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def rename(self, target: "CloudPath") -> "CloudPath":
# for cloud services replace == rename since we don't just rename,
# we actually move files
return self.replace(target)
replace(self, target: CloudPath) -> CloudPath
inherited
¶
Rename this path to the target path, overwriting if that path exists.
The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.
Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def replace(self, target: "CloudPath") -> "CloudPath":
if type(self) != type(target):
raise TypeError(
f"The target based to rename must be an instantiated class of type: {type(self)}"
)
if target.exists():
target.unlink()
self.client._move_file(self, target)
return target
rglob(self, pattern: str) -> Iterable[CloudPath]
inherited
¶
Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def rglob(self, pattern: str) -> Iterable["CloudPath"]:
return self.glob("**/" + pattern)
rmdir(self)
inherited
¶
Remove this directory. The directory must be empty. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def rmdir(self):
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmdir."
)
try:
next(self.iterdir())
raise DirectoryNotEmptyError(
f"Directory not empty: '{self}'. Use rmtree to delete recursively."
)
except StopIteration:
pass
self.client._remove(self)
rmtree(self)
inherited
¶
Delete an entire directory tree.
Source code in cloudpathlib/local/implementations/gs.py
def rmtree(self):
"""Delete an entire directory tree."""
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmtree."
)
self.client._remove(self)
samefile(self, other_path: CloudPath) -> bool
inherited
¶
Return whether other_path is the same or not as this file (as returned by os.path.samefile()). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def samefile(self, other_path: "CloudPath") -> bool:
# all cloud paths are absolute and the paths are used for hash
return self == other_path
stat(self)
inherited
¶
Return the result of the stat() system call on this path, like os.stat() does. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def stat(self):
try:
meta = self.client._stat(self)
except FileNotFoundError:
raise NoStatError(
f"No stats available for {self}; it may be a directory or not exist."
)
return meta
touch(self)
inherited
¶
Create this file with the given access mode, if it doesn't exist. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def touch(self):
self.client._touch(self)
unlink(self)
inherited
¶
Remove this file or link. If the path is a directory, use rmdir() instead. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def unlink(self):
if self.is_dir():
raise CloudPathIsADirectoryError(
f"Path {self} is a directory; call rmdir instead of unlink."
)
self.client._remove(self)
with_name(self, name)
inherited
¶
Return a new path with the file name changed. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def with_name(self, name):
return self._dispatch_to_path("with_name", name)
with_suffix(self, suffix)
inherited
¶
Return a new path with the file suffix changed. If the path has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def with_suffix(self, suffix):
return self._dispatch_to_path("with_suffix", suffix)
write_bytes(self, data: bytes)
inherited
¶
Open the file in bytes mode, write to it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def write_bytes(self, data: bytes):
"""Open the file in bytes mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1235-L1242
"""
# type-check for the buffer interface before truncating the file
view = memoryview(data)
with self.open(mode="wb") as f:
return f.write(view)
write_text(self, data: str, encoding = None, errors = None)
inherited
¶
Open the file in text mode, write to it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/gs.py
def write_text(self, data: str, encoding=None, errors=None):
"""Open the file in text mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1244-L1252
"""
if not isinstance(data, str):
raise TypeError("data must be str, not %s" % data.__class__.__name__)
with self.open(mode="w", encoding=encoding, errors=errors) as f:
return f.write(data)
LocalPath
¶
Abstract CloudPath for accessing objects the local filesystem. Subclasses are as a monkeypatch substitutes for normal CloudPath subclasses when writing tests.
Attributes¶
anchor: str
inherited
property
readonly
¶
The concatenation of the drive and root, or ''. (Docstring copied from pathlib.Path)
drive: str
inherited
property
readonly
¶
The drive prefix (letter or UNC path), if any. (Docstring copied from pathlib.Path)
fspath: str
inherited
property
readonly
¶
name
inherited
property
readonly
¶
The final path component, if any. (Docstring copied from pathlib.Path)
parent
inherited
property
readonly
¶
The logical parent of the path. (Docstring copied from pathlib.Path)
parents
inherited
property
readonly
¶
A sequence of this path's logical parents. (Docstring copied from pathlib.Path)
parts
inherited
property
readonly
¶
An object providing sequence-like access to the components in the filesystem path. (Docstring copied from pathlib.Path)
stem
inherited
property
readonly
¶
The final path component, minus its last suffix. (Docstring copied from pathlib.Path)
suffix
inherited
property
readonly
¶
The final component's last suffix, if any.
This includes the leading period. For example: '.txt' (Docstring copied from pathlib.Path)
suffixes
inherited
property
readonly
¶
A list of the final component's suffixes, if any.
These include the leading periods. For example: ['.tar', '.gz'] (Docstring copied from pathlib.Path)
Methods¶
__init__(self, cloud_path: Union[str, CloudPath], client: Optional[Client] = None)
inherited
special
¶
Source code in cloudpathlib/local/localpath.py
def __init__(self, cloud_path: Union[str, "CloudPath"], client: Optional["Client"] = None):
self.is_valid_cloudpath(cloud_path, raise_on_error=True)
# versions of the raw string that provide useful methods
self._str = str(cloud_path)
self._url = urlparse(self._str)
self._path = PurePosixPath(f"/{self._no_prefix}")
# setup client
if client is None:
if isinstance(cloud_path, CloudPath):
client = cloud_path.client
else:
client = self._cloud_meta.client_class.get_default_client()
if not isinstance(client, self._cloud_meta.client_class):
raise ClientMismatchError(
f"Client of type [{client.__class__}] is not valid for cloud path of type "
f"[{self.__class__}]; must be instance of [{self._cloud_meta.client_class}], or "
f"None to use default client for this cloud path class."
)
self.client: Client = client
# track if local has been written to, if so it may need to be uploaded
self._dirty = False
# handle if local file gets opened
self._handle = None
as_uri(self) -> str
inherited
¶
Return the path as a 'file' URI. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def as_uri(self) -> str:
return str(self)
download_to(self, destination: Union[str, os.PathLike])
inherited
¶
Source code in cloudpathlib/local/localpath.py
def download_to(self, destination: Union[str, os.PathLike]):
destination = Path(destination)
if self.is_file():
if destination.is_dir():
destination = destination / self.name
self.client._download_file(self, destination)
else:
destination.mkdir(exist_ok=True)
for f in self.iterdir():
rel = str(self)
if not rel.endswith("/"):
rel = rel + "/"
rel_dest = str(f)[len(rel) :]
f.download_to(destination / rel_dest)
exists(self) -> bool
inherited
¶
Whether this path exists. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def exists(self) -> bool:
return self.client._exists(self)
glob(self, pattern: str) -> Iterable[CloudPath]
inherited
¶
Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def glob(self, pattern: str) -> Iterable["CloudPath"]:
# strip cloud prefix from pattern if it is included
if pattern.startswith(self.cloud_prefix):
pattern = pattern[len(self.cloud_prefix) :]
# strip "drive" from pattern if it is included
if pattern.startswith(self.drive + "/"):
pattern = pattern[len(self.drive + "/") :]
# identify if pattern is recursive or not
recursive = False
if pattern.startswith("**/"):
pattern = pattern.split("/", 1)[-1]
recursive = True
for f in self.client._list_dir(self, recursive=recursive):
if fnmatch.fnmatch(f._no_prefix_no_drive, pattern):
yield f
is_dir(self) -> bool
¶
Whether this path is a directory. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def is_dir(self) -> bool:
return self.client._is_dir(self)
is_file(self) -> bool
¶
Whether this path is a regular file (also True for symlinks pointing to regular files). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def is_file(self) -> bool:
return self.client._is_file(self)
iterdir(self) -> Iterable[CloudPath]
inherited
¶
Iterate over the files in this directory. Does not yield any result for the special paths '.' and '..'. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def iterdir(self) -> Iterable["CloudPath"]:
for f in self.client._list_dir(self, recursive=False):
yield f
joinpath(self, *args)
inherited
¶
Combine this path with one or several arguments, and return a new path representing either a subpath (if all arguments are relative paths) or a totally different path (if one of the arguments is anchored). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def joinpath(self, *args):
return self._dispatch_to_path("joinpath", *args)
match(self, path_pattern)
inherited
¶
Return True if this path matches the given pattern. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def match(self, path_pattern):
# strip scheme from start of pattern before testing
if path_pattern.startswith(self.anchor + self.drive + "/"):
path_pattern = path_pattern[len(self.anchor + self.drive + "/") :]
return self._dispatch_to_path("match", path_pattern)
mkdir(self, parents: bool = False, exist_ok: bool = False)
inherited
¶
Create a new directory at this given path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
@abc.abstractmethod
def mkdir(self, parents: bool = False, exist_ok: bool = False):
"""Should be implemented using the client API without requiring a dir is downloaded"""
pass
open(self, mode = 'r', buffering = -1, encoding = None, errors = None, newline = None, force_overwrite_from_cloud = False, force_overwrite_to_cloud = False) -> IO
inherited
¶
Open the file pointed by this path and return a file object, as the built-in open() function does. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def open(
self,
mode="r",
buffering=-1,
encoding=None,
errors=None,
newline=None,
force_overwrite_from_cloud=False, # extra kwarg not in pathlib
force_overwrite_to_cloud=False, # extra kwarg not in pathlib
) -> IO:
# if trying to call open on a directory that exists
if self.exists() and not self.is_file():
raise CloudPathIsADirectoryError(
f"Cannot open directory, only files. Tried to open ({self})"
)
if mode == "x" and self.exists():
raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.")
# TODO: consider streaming from client rather than DLing entire file to cache
self._refresh_cache(force_overwrite_from_cloud=force_overwrite_from_cloud)
# create any directories that may be needed if the file is new
if not self._local.exists():
self._local.parent.mkdir(parents=True, exist_ok=True)
original_mtime = 0
else:
original_mtime = self._local.stat().st_mtime
buffer = self._local.open(
mode=mode,
buffering=buffering,
encoding=encoding,
errors=errors,
newline=newline,
)
# write modes need special on closing the buffer
if any(m in mode for m in ("w", "+", "x", "a")):
# dirty, handle, patch close
original_close = buffer.close
# since we are pretending this is a cloud file, upload it to the cloud
# when the buffer is closed
def _patched_close(*args, **kwargs):
original_close(*args, **kwargs)
# original mtime should match what was in the cloud; because of system clocks or rounding
# by the cloud provider, the new version in our cache is "older" than the original version;
# explicitly set the new modified time to be after the original modified time.
if self._local.stat().st_mtime < original_mtime:
new_mtime = original_mtime + 1
os.utime(self._local, times=(new_mtime, new_mtime))
self._upload_local_to_cloud(force_overwrite_to_cloud=force_overwrite_to_cloud)
buffer.close = _patched_close
# keep reference in case we need to close when __del__ is called on this object
self._handle = buffer
# opened for write, so mark dirty
self._dirty = True
return buffer
read_bytes(self)
inherited
¶
Open the file in bytes mode, read it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def read_bytes(self):
return self._dispatch_to_local_cache_path("read_bytes")
read_text(self)
inherited
¶
Open the file in text mode, read it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def read_text(self):
return self._dispatch_to_local_cache_path("read_text")
rename(self, target: CloudPath) -> CloudPath
inherited
¶
Rename this path to the target path.
The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.
Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def rename(self, target: "CloudPath") -> "CloudPath":
# for cloud services replace == rename since we don't just rename,
# we actually move files
return self.replace(target)
replace(self, target: CloudPath) -> CloudPath
inherited
¶
Rename this path to the target path, overwriting if that path exists.
The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.
Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def replace(self, target: "CloudPath") -> "CloudPath":
if type(self) != type(target):
raise TypeError(
f"The target based to rename must be an instantiated class of type: {type(self)}"
)
if target.exists():
target.unlink()
self.client._move_file(self, target)
return target
rglob(self, pattern: str) -> Iterable[CloudPath]
inherited
¶
Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def rglob(self, pattern: str) -> Iterable["CloudPath"]:
return self.glob("**/" + pattern)
rmdir(self)
inherited
¶
Remove this directory. The directory must be empty. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def rmdir(self):
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmdir."
)
try:
next(self.iterdir())
raise DirectoryNotEmptyError(
f"Directory not empty: '{self}'. Use rmtree to delete recursively."
)
except StopIteration:
pass
self.client._remove(self)
rmtree(self)
inherited
¶
Delete an entire directory tree.
Source code in cloudpathlib/local/localpath.py
def rmtree(self):
"""Delete an entire directory tree."""
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmtree."
)
self.client._remove(self)
samefile(self, other_path: CloudPath) -> bool
inherited
¶
Return whether other_path is the same or not as this file (as returned by os.path.samefile()). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def samefile(self, other_path: "CloudPath") -> bool:
# all cloud paths are absolute and the paths are used for hash
return self == other_path
stat(self)
¶
Return the result of the stat() system call on this path, like os.stat() does. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def stat(self):
try:
meta = self.client._stat(self)
except FileNotFoundError:
raise NoStatError(
f"No stats available for {self}; it may be a directory or not exist."
)
return meta
touch(self)
¶
Create this file with the given access mode, if it doesn't exist. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def touch(self):
self.client._touch(self)
unlink(self)
inherited
¶
Remove this file or link. If the path is a directory, use rmdir() instead. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def unlink(self):
if self.is_dir():
raise CloudPathIsADirectoryError(
f"Path {self} is a directory; call rmdir instead of unlink."
)
self.client._remove(self)
with_name(self, name)
inherited
¶
Return a new path with the file name changed. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def with_name(self, name):
return self._dispatch_to_path("with_name", name)
with_suffix(self, suffix)
inherited
¶
Return a new path with the file suffix changed. If the path has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def with_suffix(self, suffix):
return self._dispatch_to_path("with_suffix", suffix)
write_bytes(self, data: bytes)
inherited
¶
Open the file in bytes mode, write to it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def write_bytes(self, data: bytes):
"""Open the file in bytes mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1235-L1242
"""
# type-check for the buffer interface before truncating the file
view = memoryview(data)
with self.open(mode="wb") as f:
return f.write(view)
write_text(self, data: str, encoding = None, errors = None)
inherited
¶
Open the file in text mode, write to it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/localpath.py
def write_text(self, data: str, encoding=None, errors=None):
"""Open the file in text mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1244-L1252
"""
if not isinstance(data, str):
raise TypeError("data must be str, not %s" % data.__class__.__name__)
with self.open(mode="w", encoding=encoding, errors=errors) as f:
return f.write(data)
LocalS3Client
¶
Replacement for S3Client that uses the local file system. Intended as a monkeypatch substitute when writing tests.
Methods¶
__init__(self, *args, *, local_cache_dir: Union[str, os.PathLike] = None, local_storage_dir: Union[str, os.PathLike] = None, **kwargs)
inherited
special
¶
Source code in cloudpathlib/local/implementations/s3.py
def __init__(
self,
*args,
local_cache_dir: Optional[Union[str, os.PathLike]] = None,
local_storage_dir: Optional[Union[str, os.PathLike]] = None,
**kwargs,
):
# setup caching and local versions of file. use default temp dir if not provided
if local_storage_dir is None:
local_storage_dir = self.get_default_storage_dir()
self._local_storage_dir = Path(local_storage_dir)
super().__init__(local_cache_dir=local_cache_dir)
CloudPath(self, cloud_path: Union[str, ~BoundedCloudPath]) -> ~BoundedCloudPath
inherited
¶
Source code in cloudpathlib/local/implementations/s3.py
def CloudPath(self, cloud_path: Union[str, BoundedCloudPath]) -> BoundedCloudPath:
return self._cloud_meta.path_class(cloud_path=cloud_path, client=self)
S3Path(self, cloud_path: Union[str, ~BoundedCloudPath]) -> ~BoundedCloudPath
¶
Source code in cloudpathlib/local/implementations/s3.py
def CloudPath(self, cloud_path: Union[str, BoundedCloudPath]) -> BoundedCloudPath:
return self._cloud_meta.path_class(cloud_path=cloud_path, client=self)
set_as_default_client(self) -> None
inherited
¶
Set this client instance as the default one used when instantiating cloud path instances for this cloud without a client specified.
Source code in cloudpathlib/local/implementations/s3.py
def set_as_default_client(self) -> None:
"""Set this client instance as the default one used when instantiating cloud path
instances for this cloud without a client specified."""
self.__class__._default_client = self
LocalS3Path
¶
Replacement for S3Path that uses the local file system. Intended as a monkeypatch substitute when writing tests.
Attributes¶
anchor: str
inherited
property
readonly
¶
The concatenation of the drive and root, or ''. (Docstring copied from pathlib.Path)
bucket: str
property
readonly
¶
cloud_prefix: str
¶
drive: str
property
readonly
¶
The drive prefix (letter or UNC path), if any. (Docstring copied from pathlib.Path)
etag
property
readonly
¶
fspath: str
inherited
property
readonly
¶
key: str
property
readonly
¶
name
inherited
property
readonly
¶
The final path component, if any. (Docstring copied from pathlib.Path)
parent
inherited
property
readonly
¶
The logical parent of the path. (Docstring copied from pathlib.Path)
parents
inherited
property
readonly
¶
A sequence of this path's logical parents. (Docstring copied from pathlib.Path)
parts
inherited
property
readonly
¶
An object providing sequence-like access to the components in the filesystem path. (Docstring copied from pathlib.Path)
stem
inherited
property
readonly
¶
The final path component, minus its last suffix. (Docstring copied from pathlib.Path)
suffix
inherited
property
readonly
¶
The final component's last suffix, if any.
This includes the leading period. For example: '.txt' (Docstring copied from pathlib.Path)
suffixes
inherited
property
readonly
¶
A list of the final component's suffixes, if any.
These include the leading periods. For example: ['.tar', '.gz'] (Docstring copied from pathlib.Path)
Methods¶
__init__(self, cloud_path: Union[str, CloudPath], client: Optional[Client] = None)
inherited
special
¶
Source code in cloudpathlib/local/implementations/s3.py
def __init__(self, cloud_path: Union[str, "CloudPath"], client: Optional["Client"] = None):
self.is_valid_cloudpath(cloud_path, raise_on_error=True)
# versions of the raw string that provide useful methods
self._str = str(cloud_path)
self._url = urlparse(self._str)
self._path = PurePosixPath(f"/{self._no_prefix}")
# setup client
if client is None:
if isinstance(cloud_path, CloudPath):
client = cloud_path.client
else:
client = self._cloud_meta.client_class.get_default_client()
if not isinstance(client, self._cloud_meta.client_class):
raise ClientMismatchError(
f"Client of type [{client.__class__}] is not valid for cloud path of type "
f"[{self.__class__}]; must be instance of [{self._cloud_meta.client_class}], or "
f"None to use default client for this cloud path class."
)
self.client: Client = client
# track if local has been written to, if so it may need to be uploaded
self._dirty = False
# handle if local file gets opened
self._handle = None
as_uri(self) -> str
inherited
¶
Return the path as a 'file' URI. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def as_uri(self) -> str:
return str(self)
download_to(self, destination: Union[str, os.PathLike])
inherited
¶
Source code in cloudpathlib/local/implementations/s3.py
def download_to(self, destination: Union[str, os.PathLike]):
destination = Path(destination)
if self.is_file():
if destination.is_dir():
destination = destination / self.name
self.client._download_file(self, destination)
else:
destination.mkdir(exist_ok=True)
for f in self.iterdir():
rel = str(self)
if not rel.endswith("/"):
rel = rel + "/"
rel_dest = str(f)[len(rel) :]
f.download_to(destination / rel_dest)
exists(self) -> bool
inherited
¶
Whether this path exists. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def exists(self) -> bool:
return self.client._exists(self)
glob(self, pattern: str) -> Iterable[CloudPath]
inherited
¶
Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def glob(self, pattern: str) -> Iterable["CloudPath"]:
# strip cloud prefix from pattern if it is included
if pattern.startswith(self.cloud_prefix):
pattern = pattern[len(self.cloud_prefix) :]
# strip "drive" from pattern if it is included
if pattern.startswith(self.drive + "/"):
pattern = pattern[len(self.drive + "/") :]
# identify if pattern is recursive or not
recursive = False
if pattern.startswith("**/"):
pattern = pattern.split("/", 1)[-1]
recursive = True
for f in self.client._list_dir(self, recursive=recursive):
if fnmatch.fnmatch(f._no_prefix_no_drive, pattern):
yield f
is_dir(self) -> bool
inherited
¶
Whether this path is a directory. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def is_dir(self) -> bool:
return self.client._is_dir(self)
is_file(self) -> bool
inherited
¶
Whether this path is a regular file (also True for symlinks pointing to regular files). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def is_file(self) -> bool:
return self.client._is_file(self)
iterdir(self) -> Iterable[CloudPath]
inherited
¶
Iterate over the files in this directory. Does not yield any result for the special paths '.' and '..'. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def iterdir(self) -> Iterable["CloudPath"]:
for f in self.client._list_dir(self, recursive=False):
yield f
joinpath(self, *args)
inherited
¶
Combine this path with one or several arguments, and return a new path representing either a subpath (if all arguments are relative paths) or a totally different path (if one of the arguments is anchored). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def joinpath(self, *args):
return self._dispatch_to_path("joinpath", *args)
match(self, path_pattern)
inherited
¶
Return True if this path matches the given pattern. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def match(self, path_pattern):
# strip scheme from start of pattern before testing
if path_pattern.startswith(self.anchor + self.drive + "/"):
path_pattern = path_pattern[len(self.anchor + self.drive + "/") :]
return self._dispatch_to_path("match", path_pattern)
mkdir(self, parents = False, exist_ok = False)
¶
Create a new directory at this given path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def mkdir(self, parents=False, exist_ok=False):
# not possible to make empty directory on s3
pass
open(self, mode = 'r', buffering = -1, encoding = None, errors = None, newline = None, force_overwrite_from_cloud = False, force_overwrite_to_cloud = False) -> IO
inherited
¶
Open the file pointed by this path and return a file object, as the built-in open() function does. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def open(
self,
mode="r",
buffering=-1,
encoding=None,
errors=None,
newline=None,
force_overwrite_from_cloud=False, # extra kwarg not in pathlib
force_overwrite_to_cloud=False, # extra kwarg not in pathlib
) -> IO:
# if trying to call open on a directory that exists
if self.exists() and not self.is_file():
raise CloudPathIsADirectoryError(
f"Cannot open directory, only files. Tried to open ({self})"
)
if mode == "x" and self.exists():
raise CloudPathFileExistsError(f"Cannot open existing file ({self}) for creation.")
# TODO: consider streaming from client rather than DLing entire file to cache
self._refresh_cache(force_overwrite_from_cloud=force_overwrite_from_cloud)
# create any directories that may be needed if the file is new
if not self._local.exists():
self._local.parent.mkdir(parents=True, exist_ok=True)
original_mtime = 0
else:
original_mtime = self._local.stat().st_mtime
buffer = self._local.open(
mode=mode,
buffering=buffering,
encoding=encoding,
errors=errors,
newline=newline,
)
# write modes need special on closing the buffer
if any(m in mode for m in ("w", "+", "x", "a")):
# dirty, handle, patch close
original_close = buffer.close
# since we are pretending this is a cloud file, upload it to the cloud
# when the buffer is closed
def _patched_close(*args, **kwargs):
original_close(*args, **kwargs)
# original mtime should match what was in the cloud; because of system clocks or rounding
# by the cloud provider, the new version in our cache is "older" than the original version;
# explicitly set the new modified time to be after the original modified time.
if self._local.stat().st_mtime < original_mtime:
new_mtime = original_mtime + 1
os.utime(self._local, times=(new_mtime, new_mtime))
self._upload_local_to_cloud(force_overwrite_to_cloud=force_overwrite_to_cloud)
buffer.close = _patched_close
# keep reference in case we need to close when __del__ is called on this object
self._handle = buffer
# opened for write, so mark dirty
self._dirty = True
return buffer
read_bytes(self)
inherited
¶
Open the file in bytes mode, read it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def read_bytes(self):
return self._dispatch_to_local_cache_path("read_bytes")
read_text(self)
inherited
¶
Open the file in text mode, read it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def read_text(self):
return self._dispatch_to_local_cache_path("read_text")
rename(self, target: CloudPath) -> CloudPath
inherited
¶
Rename this path to the target path.
The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.
Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def rename(self, target: "CloudPath") -> "CloudPath":
# for cloud services replace == rename since we don't just rename,
# we actually move files
return self.replace(target)
replace(self, target: CloudPath) -> CloudPath
inherited
¶
Rename this path to the target path, overwriting if that path exists.
The target path may be absolute or relative. Relative paths are interpreted relative to the current working directory, not the directory of the Path object.
Returns the new Path instance pointing to the target path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def replace(self, target: "CloudPath") -> "CloudPath":
if type(self) != type(target):
raise TypeError(
f"The target based to rename must be an instantiated class of type: {type(self)}"
)
if target.exists():
target.unlink()
self.client._move_file(self, target)
return target
rglob(self, pattern: str) -> Iterable[CloudPath]
inherited
¶
Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def rglob(self, pattern: str) -> Iterable["CloudPath"]:
return self.glob("**/" + pattern)
rmdir(self)
inherited
¶
Remove this directory. The directory must be empty. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def rmdir(self):
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmdir."
)
try:
next(self.iterdir())
raise DirectoryNotEmptyError(
f"Directory not empty: '{self}'. Use rmtree to delete recursively."
)
except StopIteration:
pass
self.client._remove(self)
rmtree(self)
inherited
¶
Delete an entire directory tree.
Source code in cloudpathlib/local/implementations/s3.py
def rmtree(self):
"""Delete an entire directory tree."""
if self.is_file():
raise CloudPathNotADirectoryError(
f"Path {self} is a file; call unlink instead of rmtree."
)
self.client._remove(self)
samefile(self, other_path: CloudPath) -> bool
inherited
¶
Return whether other_path is the same or not as this file (as returned by os.path.samefile()). (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def samefile(self, other_path: "CloudPath") -> bool:
# all cloud paths are absolute and the paths are used for hash
return self == other_path
stat(self)
inherited
¶
Return the result of the stat() system call on this path, like os.stat() does. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def stat(self):
try:
meta = self.client._stat(self)
except FileNotFoundError:
raise NoStatError(
f"No stats available for {self}; it may be a directory or not exist."
)
return meta
touch(self)
inherited
¶
Create this file with the given access mode, if it doesn't exist. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def touch(self):
self.client._touch(self)
unlink(self)
inherited
¶
Remove this file or link. If the path is a directory, use rmdir() instead. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def unlink(self):
if self.is_dir():
raise CloudPathIsADirectoryError(
f"Path {self} is a directory; call rmdir instead of unlink."
)
self.client._remove(self)
with_name(self, name)
inherited
¶
Return a new path with the file name changed. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def with_name(self, name):
return self._dispatch_to_path("with_name", name)
with_suffix(self, suffix)
inherited
¶
Return a new path with the file suffix changed. If the path has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def with_suffix(self, suffix):
return self._dispatch_to_path("with_suffix", suffix)
write_bytes(self, data: bytes)
inherited
¶
Open the file in bytes mode, write to it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def write_bytes(self, data: bytes):
"""Open the file in bytes mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1235-L1242
"""
# type-check for the buffer interface before truncating the file
view = memoryview(data)
with self.open(mode="wb") as f:
return f.write(view)
write_text(self, data: str, encoding = None, errors = None)
inherited
¶
Open the file in text mode, write to it, and close the file. (Docstring copied from pathlib.Path)
Source code in cloudpathlib/local/implementations/s3.py
def write_text(self, data: str, encoding=None, errors=None):
"""Open the file in text mode, write to it, and close the file.
NOTE: vendored from pathlib since we override open
https://github.com/python/cpython/blob/3.8/Lib/pathlib.py#L1244-L1252
"""
if not isinstance(data, str):
raise TypeError("data must be str, not %s" % data.__class__.__name__)
with self.open(mode="w", encoding=encoding, errors=errors) as f:
return f.write(data)