Source code for secretflow.data.io.util

# Copyright 2022 Ant Group Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import builtins
from pathlib import Path
from typing import Union
from urllib.parse import urlparse

import pandas as pd

import secretflow.data.io.oss as oss


[docs]def open(filepath: Union[str, Path], mode='rb'): """Open a oss/http/https file. Args: filepath: The file path, which can be an oss, or pathlib.Path object. mode: optional. open mode. Returns: the file object. """ if not isinstance(filepath, str): return filepath o = urlparse(filepath) if o.scheme == 'oss': return oss.open(filepath, mode) return builtins.open(filepath, mode)
[docs]def is_local_file(uri: str) -> bool: return uri and not urlparse(uri).scheme
[docs]def read_csv_wrapper(filepath, **kwargs) -> pd.DataFrame: """A wrapper of pandas read_csv and supports oss file. Args: filepath: the file path. kwargs: all other arguments are same with :py:meth:`pandas.DataFrame.read_csv`. Returns: a pandas DataFrame. """ return pd.read_csv(open(filepath), **kwargs)
[docs]def to_csv_wrapper(df: pd.DataFrame, filepath, **kwargs): """A wrapper of pandas to_csv and supports oss file. Args: filepath: the file path. kwargs: all other arguments are same with :py:meth:`pandas.DataFrame.read_csv`. Returns: a pandas DataFrame. """ if is_local_file(filepath): Path(filepath).parent.mkdir(parents=True, exist_ok=True) df.to_csv(open(filepath, 'wb'), **kwargs)