1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
use std::str::FromStr;

#[cfg(feature = "aws")]
use object_store::aws::AmazonS3Builder;
#[cfg(feature = "aws")]
pub use object_store::aws::AmazonS3ConfigKey;
#[cfg(feature = "azure")]
pub use object_store::azure::AzureConfigKey;
#[cfg(feature = "azure")]
use object_store::azure::MicrosoftAzureBuilder;
#[cfg(feature = "gcp")]
use object_store::gcp::GoogleCloudStorageBuilder;
#[cfg(feature = "gcp")]
pub use object_store::gcp::GoogleConfigKey;
#[cfg(feature = "async")]
use object_store::ObjectStore;
#[cfg(feature = "serde-lazy")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "async")]
use url::Url;

use crate::error::{PolarsError, PolarsResult};

/// The type of the config keys must satisfy the following requirements:
/// 1. must be easily collected into a HashMap, the type required by the object_crate API.
/// 2. be Serializable, required when the serde-lazy feature is defined.
/// 3. not actually use HashMap since that type is disallowed in Polars for performance reasons.
///
/// Currently this type is a vector of pairs config key - config value.
#[allow(dead_code)]
type Configs<T> = Vec<(T, String)>;

#[derive(Clone, Debug, Default)]
#[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))]
/// Options to conect to various cloud providers.
pub struct CloudOptions {
    #[cfg(feature = "aws")]
    aws: Option<Configs<AmazonS3ConfigKey>>,
    #[cfg(feature = "azure")]
    azure: Option<Configs<AzureConfigKey>>,
    #[cfg(feature = "gcp")]
    gcp: Option<Configs<GoogleConfigKey>>,
}

#[allow(dead_code)]
/// Parse an untype configuration hashmap to a typed configuration for the given configuration key type.
fn parsed_untyped_config<T, I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>>(
    config: I,
) -> PolarsResult<Configs<T>>
where
    T: FromStr + std::cmp::Eq + std::hash::Hash,
{
    config
        .into_iter()
        .map(|(key, val)| {
            T::from_str(key.as_ref())
                .map_err(|_e| {
                    PolarsError::ComputeError(
                        format!("Unknown configuration key {}.", key.as_ref()).into(),
                    )
                })
                .map(|typed_key| (typed_key, val.into()))
        })
        .collect::<PolarsResult<Configs<T>>>()
}

pub enum CloudType {
    Aws,
    Azure,
    File,
    Gcp,
}

impl FromStr for CloudType {
    type Err = PolarsError;

    #[cfg(feature = "async")]
    fn from_str(url: &str) -> Result<Self, Self::Err> {
        let parsed = Url::parse(url).map_err(anyhow::Error::from)?;
        match parsed.scheme() {
            "s3" => Ok(Self::Aws),
            "az" | "adl" | "abfs" => Ok(Self::Azure),
            "gs" | "gcp" => Ok(Self::Gcp),
            "file" => Ok(Self::File),
            &_ => Err(PolarsError::ComputeError("Unknown url scheme.".into())),
        }
    }

    #[cfg(not(feature = "async"))]
    fn from_str(_s: &str) -> Result<Self, Self::Err> {
        Err(PolarsError::ComputeError(
            "At least one of the cloud features must be enabled.".into(),
        ))
    }
}

impl CloudOptions {
    /// Set the configuration for AWS connections. This is the preferred API from rust.
    #[cfg(feature = "aws")]
    pub fn with_aws<I: IntoIterator<Item = (AmazonS3ConfigKey, impl Into<String>)>>(
        mut self,
        configs: I,
    ) -> Self {
        self.aws = Some(
            configs
                .into_iter()
                .map(|(k, v)| (k, v.into()))
                .collect::<Configs<AmazonS3ConfigKey>>(),
        );
        self
    }

    /// Build the ObjectStore implementation for AWS.
    #[cfg(feature = "aws")]
    pub fn build_aws(&self, bucket_name: &str) -> PolarsResult<impl ObjectStore> {
        let options = self.aws.as_ref().map(Ok).unwrap_or_else(|| {
            Err(PolarsError::ComputeError(
                "`aws` configuration missing.".into(),
            ))
        })?;
        AmazonS3Builder::new()
            .try_with_options(options.clone().into_iter())
            .and_then(|b| b.with_bucket_name(bucket_name).build())
            .map_err(|e| PolarsError::ComputeError(e.to_string().into()))
    }

    /// Set the configuration for Azure connections. This is the preferred API from rust.
    #[cfg(feature = "azure")]
    pub fn with_azure<I: IntoIterator<Item = (AzureConfigKey, impl Into<String>)>>(
        mut self,
        configs: I,
    ) -> Self {
        self.azure = Some(
            configs
                .into_iter()
                .map(|(k, v)| (k, v.into()))
                .collect::<Configs<AzureConfigKey>>(),
        );
        self
    }

    /// Build the ObjectStore implementation for Azure.
    #[cfg(feature = "azure")]
    pub fn build_azure(&self, container_name: &str) -> PolarsResult<impl ObjectStore> {
        let options = self.azure.as_ref().map(Ok).unwrap_or_else(|| {
            Err(PolarsError::ComputeError(
                "`azure` configuration missing.".into(),
            ))
        })?;
        MicrosoftAzureBuilder::new()
            .try_with_options(options.clone().into_iter())
            .and_then(|b| b.with_container_name(container_name).build())
            .map_err(|e| PolarsError::ComputeError(e.to_string().into()))
    }

    /// Set the configuration for GCP connections. This is the preferred API from rust.
    #[cfg(feature = "gcp")]
    pub fn with_gcp<I: IntoIterator<Item = (GoogleConfigKey, impl Into<String>)>>(
        mut self,
        configs: I,
    ) -> Self {
        self.gcp = Some(
            configs
                .into_iter()
                .map(|(k, v)| (k, v.into()))
                .collect::<Configs<GoogleConfigKey>>(),
        );
        self
    }

    /// Build the ObjectStore implementation for GCP.
    #[cfg(feature = "gcp")]
    pub fn build_gcp(&self, bucket_name: &str) -> PolarsResult<impl ObjectStore> {
        let options = self.gcp.as_ref().map(Ok).unwrap_or_else(|| {
            Err(PolarsError::ComputeError(
                "`gcp` configuration missing.".into(),
            ))
        })?;
        GoogleCloudStorageBuilder::new()
            .try_with_options(options.clone().into_iter())
            .and_then(|b| b.with_bucket_name(bucket_name).build())
            .map_err(|e| PolarsError::ComputeError(e.to_string().into()))
    }

    /// Parse a configuration from a Hashmap. This is the interface from Python.
    #[allow(unused_variables)]
    pub fn from_untyped_config<I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>>(
        url: &str,
        config: I,
    ) -> PolarsResult<Self> {
        match CloudType::from_str(url)? {
            CloudType::Aws => {
                #[cfg(feature = "aws")]
                {
                    parsed_untyped_config::<AmazonS3ConfigKey, _>(config)
                        .map(|aws| Self::default().with_aws(aws))
                }
                #[cfg(not(feature = "aws"))]
                {
                    Err(PolarsError::ComputeError(
                        "Feature aws is not enabled.".into(),
                    ))
                }
            }
            CloudType::Azure => {
                #[cfg(feature = "azure")]
                {
                    parsed_untyped_config::<AzureConfigKey, _>(config)
                        .map(|azure| Self::default().with_azure(azure))
                }
                #[cfg(not(feature = "azure"))]
                {
                    Err(PolarsError::ComputeError(
                        "Feature gcp is not enabled.".into(),
                    ))
                }
            }
            CloudType::File => Ok(Self::default()),
            CloudType::Gcp => {
                #[cfg(feature = "gcp")]
                {
                    parsed_untyped_config::<GoogleConfigKey, _>(config)
                        .map(|gcp| Self::default().with_gcp(gcp))
                }
                #[cfg(not(feature = "gcp"))]
                {
                    Err(PolarsError::ComputeError(
                        "Feature gcp is not enabled.".into(),
                    ))
                }
            }
        }
    }
}