Struct polars_lazy::frame::LazyFrame
source · pub struct LazyFrame {
pub logical_plan: LogicalPlan,
/* private fields */
}
Expand description
Lazy abstraction over an eager DataFrame
.
It really is an abstraction over a logical plan. The methods of this struct will incrementally
modify a logical plan until output is requested (via collect)
Fields§
§logical_plan: LogicalPlan
Implementations§
source§impl LazyFrame
impl LazyFrame
sourcepub fn to_dot(&self, optimized: bool) -> PolarsResult<String>
Available on crate feature dot_diagram
only.
pub fn to_dot(&self, optimized: bool) -> PolarsResult<String>
dot_diagram
only.Get a dot language representation of the LogicalPlan.
source§impl LazyFrame
impl LazyFrame
sourcepub fn scan_ipc(path: impl AsRef<Path>, args: ScanArgsIpc) -> PolarsResult<Self>
Available on crate feature ipc
only.
pub fn scan_ipc(path: impl AsRef<Path>, args: ScanArgsIpc) -> PolarsResult<Self>
ipc
only.Create a LazyFrame directly from a ipc scan.
source§impl LazyFrame
impl LazyFrame
sourcepub fn scan_parquet_files<P: AsRef<Path>>(
paths: Vec<P>,
args: ScanArgsParquet
) -> PolarsResult<Self>
👎Deprecated: please use concat_lf
insteadAvailable on crate feature parquet
only.
pub fn scan_parquet_files<P: AsRef<Path>>(
paths: Vec<P>,
args: ScanArgsParquet
) -> PolarsResult<Self>
concat_lf
insteadparquet
only.Create a LazyFrame directly from a parquet scan.
sourcepub fn scan_parquet(
path: impl AsRef<Path>,
args: ScanArgsParquet
) -> PolarsResult<Self>
Available on crate feature parquet
only.
pub fn scan_parquet(
path: impl AsRef<Path>,
args: ScanArgsParquet
) -> PolarsResult<Self>
parquet
only.Create a LazyFrame directly from a parquet scan.
source§impl LazyFrame
impl LazyFrame
pub fn anonymous_scan(
function: Arc<dyn AnonymousScan>,
args: ScanArgsAnonymous
) -> PolarsResult<Self>
source§impl LazyFrame
impl LazyFrame
sourcepub fn schema(&self) -> PolarsResult<SchemaRef>
pub fn schema(&self) -> PolarsResult<SchemaRef>
Get a hold on the schema of the current LazyFrame computation.
sourcepub fn with_optimizations(self, opt_state: OptState) -> Self
pub fn with_optimizations(self, opt_state: OptState) -> Self
Set allowed optimizations
sourcepub fn without_optimizations(self) -> Self
pub fn without_optimizations(self) -> Self
Turn off all optimizations
sourcepub fn with_projection_pushdown(self, toggle: bool) -> Self
pub fn with_projection_pushdown(self, toggle: bool) -> Self
Toggle projection pushdown optimization.
sourcepub fn with_predicate_pushdown(self, toggle: bool) -> Self
pub fn with_predicate_pushdown(self, toggle: bool) -> Self
Toggle predicate pushdown optimization.
sourcepub fn with_type_coercion(self, toggle: bool) -> Self
pub fn with_type_coercion(self, toggle: bool) -> Self
Toggle type coercion optimization.
sourcepub fn with_simplify_expr(self, toggle: bool) -> Self
pub fn with_simplify_expr(self, toggle: bool) -> Self
Toggle expression simplification optimization on or off
sourcepub fn with_slice_pushdown(self, toggle: bool) -> Self
pub fn with_slice_pushdown(self, toggle: bool) -> Self
Toggle slice pushdown optimization
sourcepub fn with_streaming(self, toggle: bool) -> Self
pub fn with_streaming(self, toggle: bool) -> Self
Allow (partial) streaming engine
sourcepub fn describe_plan(&self) -> String
pub fn describe_plan(&self) -> String
Describe the logical plan.
sourcepub fn describe_optimized_plan(&self) -> PolarsResult<String>
pub fn describe_optimized_plan(&self) -> PolarsResult<String>
Describe the optimized logical plan.
sourcepub fn sort(self, by_column: &str, options: SortOptions) -> Self
pub fn sort(self, by_column: &str, options: SortOptions) -> Self
Add a sort operation to the logical plan.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
/// Sort DataFrame by 'sepal.width' column
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.sort("sepal.width", Default::default())
}
sourcepub fn sort_by_exprs<E: AsRef<[Expr]>, B: AsRef<[bool]>>(
self,
by_exprs: E,
reverse: B,
nulls_last: bool
) -> Self
pub fn sort_by_exprs<E: AsRef<[Expr]>, B: AsRef<[bool]>>(
self,
by_exprs: E,
reverse: B,
nulls_last: bool
) -> Self
Add a sort operation to the logical plan.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
/// Sort DataFrame by 'sepal.width' column
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.sort_by_exprs(vec![col("sepal.width")], vec![false], false)
}
sourcepub fn reverse(self) -> Self
pub fn reverse(self) -> Self
Reverse the DataFrame
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.reverse()
}
sourcepub fn rename<I, J, T, S>(self, existing: I, new: J) -> Selfwhere
I: IntoIterator<Item = T>,
J: IntoIterator<Item = S>,
T: AsRef<str>,
S: AsRef<str>,
pub fn rename<I, J, T, S>(self, existing: I, new: J) -> Selfwhere
I: IntoIterator<Item = T>,
J: IntoIterator<Item = S>,
T: AsRef<str>,
S: AsRef<str>,
Rename columns in the DataFrame.
sourcepub fn drop_columns<I, T>(self, columns: I) -> Selfwhere
I: IntoIterator<Item = T>,
T: AsRef<str>,
pub fn drop_columns<I, T>(self, columns: I) -> Selfwhere
I: IntoIterator<Item = T>,
T: AsRef<str>,
Removes columns from the DataFrame. Note that its better to only select the columns you need and let the projection pushdown optimize away the unneeded columns.
sourcepub fn shift(self, periods: i64) -> Self
pub fn shift(self, periods: i64) -> Self
Shift the values by a given period and fill the parts that will be empty due to this operation
with Nones
.
See the method on Series for more info on the shift
operation.
sourcepub fn shift_and_fill<E: Into<Expr>>(self, periods: i64, fill_value: E) -> Self
pub fn shift_and_fill<E: Into<Expr>>(self, periods: i64, fill_value: E) -> Self
Shift the values by a given period and fill the parts that will be empty due to this operation
with the result of the fill_value
expression.
See the method on Series for more info on the shift
operation.
sourcepub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
pub fn fill_null<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
Fill none values in the DataFrame
sourcepub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
pub fn fill_nan<E: Into<Expr>>(self, fill_value: E) -> LazyFrame
Fill NaN values in the DataFrame
sourcepub fn cache(self) -> Self
pub fn cache(self) -> Self
Caches the result into a new LazyFrame. This should be used to prevent computations running multiple times
sourcepub fn fetch(self, n_rows: usize) -> PolarsResult<DataFrame>
pub fn fetch(self, n_rows: usize) -> PolarsResult<DataFrame>
Fetch is like a collect operation, but it overwrites the number of rows read by every scan operation. This is a utility that helps debug a query on a smaller number of rows.
Note that the fetch does not guarantee the final number of rows in the DataFrame. Filter, join operations and a lower number of rows available in the scanned file influence the final number of rows.
pub fn optimize(
self,
lp_arena: &mut Arena<ALogicalPlan>,
expr_arena: &mut Arena<AExpr>
) -> PolarsResult<Node>
sourcepub fn collect(self) -> PolarsResult<DataFrame>
pub fn collect(self) -> PolarsResult<DataFrame>
Execute all the lazy operations and collect them into a DataFrame
.
Before execution the query is being optimized.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> PolarsResult<DataFrame> {
df.lazy()
.groupby([col("foo")])
.agg([col("bar").sum(), col("ham").mean().alias("avg_ham")])
.collect()
}
pub fn profile(self) -> PolarsResult<(DataFrame, DataFrame)>
sourcepub fn sink_parquet(
self,
path: PathBuf,
options: ParquetWriteOptions
) -> PolarsResult<()>
Available on crate feature parquet
only.
pub fn sink_parquet(
self,
path: PathBuf,
options: ParquetWriteOptions
) -> PolarsResult<()>
parquet
only.into memory. This methods will return an error if the query cannot be completely done in a streaming fashion.
sourcepub fn filter(self, predicate: Expr) -> Self
pub fn filter(self, predicate: Expr) -> Self
Filter by some predicate expression.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.filter(col("sepal.width").is_not_null())
.select(&[col("sepal.width"), col("sepal.length")])
}
sourcepub fn select<E: AsRef<[Expr]>>(self, exprs: E) -> Self
pub fn select<E: AsRef<[Expr]>>(self, exprs: E) -> Self
Select (and rename) columns from the query.
Columns can be selected with col;
If you want to select all columns use col("*")
.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
/// This function selects column "foo" and column "bar".
/// Column "bar" is renamed to "ham".
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.select(&[col("foo"),
col("bar").alias("ham")])
}
/// This function selects all columns except "foo"
fn exclude_a_column(df: DataFrame) -> LazyFrame {
df.lazy()
.select(&[col("*").exclude(["foo"])])
}
sourcepub fn groupby<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
by: E
) -> LazyGroupBy
pub fn groupby<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
by: E
) -> LazyGroupBy
Group by and aggregate.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
use polars_arrow::prelude::QuantileInterpolOptions;
fn example(df: DataFrame) -> LazyFrame {
df.lazy()
.groupby([col("date")])
.agg([
col("rain").min(),
col("rain").sum(),
col("rain").quantile(lit(0.5), QuantileInterpolOptions::Nearest).alias("median_rain"),
])
}
sourcepub fn groupby_stable<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
by: E
) -> LazyGroupBy
pub fn groupby_stable<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
by: E
) -> LazyGroupBy
Similar to [groupby
], but order of the DataFrame is maintained.
sourcepub fn left_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
pub fn left_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
Join query with other lazy query.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.left_join(other, col("foo"), col("bar"))
}
sourcepub fn outer_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
pub fn outer_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
Join query with other lazy query.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.outer_join(other, col("foo"), col("bar"))
}
sourcepub fn inner_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
pub fn inner_join<E: Into<Expr>>(
self,
other: LazyFrame,
left_on: E,
right_on: E
) -> LazyFrame
Join query with other lazy query.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.inner_join(other, col("foo"), col("bar").cast(DataType::Utf8))
}
sourcepub fn cross_join(self, other: LazyFrame) -> LazyFrame
Available on crate feature cross_join
only.
pub fn cross_join(self, other: LazyFrame) -> LazyFrame
cross_join
only.Creates the cartesian product from both frames, preserves the order of the left keys.
sourcepub fn join<E: AsRef<[Expr]>>(
self,
other: LazyFrame,
left_on: E,
right_on: E,
how: JoinType
) -> LazyFrame
pub fn join<E: AsRef<[Expr]>>(
self,
other: LazyFrame,
left_on: E,
right_on: E,
how: JoinType
) -> LazyFrame
Generic join function that can join on multiple columns.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn example(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
ldf
.join(other, [col("foo"), col("bar")], [col("foo"), col("bar")], JoinType::Inner)
}
sourcepub fn join_builder(self) -> JoinBuilder
pub fn join_builder(self) -> JoinBuilder
Control more join options with the join builder.
sourcepub fn with_column(self, expr: Expr) -> LazyFrame
pub fn with_column(self, expr: Expr) -> LazyFrame
Add a column to a DataFrame
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_column(df: DataFrame) -> LazyFrame {
df.lazy()
.with_column(
when(col("sepal.length").lt(lit(5.0)))
.then(lit(10))
.otherwise(lit(1))
.alias("new_column_name"),
)
}
sourcepub fn with_columns<E: AsRef<[Expr]>>(self, exprs: E) -> LazyFrame
pub fn with_columns<E: AsRef<[Expr]>>(self, exprs: E) -> LazyFrame
Add multiple columns to a DataFrame.
Example
use polars_core::prelude::*;
use polars_lazy::prelude::*;
fn add_columns(df: DataFrame) -> LazyFrame {
df.lazy()
.with_columns(
vec![lit(10).alias("foo"), lit(100).alias("bar")]
)
}
pub fn with_context<C: AsRef<[LazyFrame]>>(self, contexts: C) -> LazyFrame
sourcepub fn quantile(
self,
quantile: Expr,
interpol: QuantileInterpolOptions
) -> LazyFrame
pub fn quantile(
self,
quantile: Expr,
interpol: QuantileInterpolOptions
) -> LazyFrame
Aggregate all the columns as their quantile values.
sourcepub fn std(self, ddof: u8) -> LazyFrame
pub fn std(self, ddof: u8) -> LazyFrame
Aggregate all the columns as their standard deviation values.
sourcepub fn explode<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
columns: E
) -> LazyFrame
pub fn explode<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(
self,
columns: E
) -> LazyFrame
Apply explode operation. See eager explode.
sourcepub fn unique_stable(
self,
subset: Option<Vec<String>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
pub fn unique_stable(
self,
subset: Option<Vec<String>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
Keep unique rows and maintain order
sourcepub fn unique(
self,
subset: Option<Vec<String>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
pub fn unique(
self,
subset: Option<Vec<String>>,
keep_strategy: UniqueKeepStrategy
) -> LazyFrame
Keep unique rows, do not maintain order
sourcepub fn drop_nulls(self, subset: Option<Vec<Expr>>) -> LazyFrame
pub fn drop_nulls(self, subset: Option<Vec<Expr>>) -> LazyFrame
Drop null rows.
Equal to LazyFrame::filter(col("*").is_not_null())
sourcepub fn limit(self, n: IdxSize) -> LazyFrame
pub fn limit(self, n: IdxSize) -> LazyFrame
Limit the DataFrame to the first n
rows. Note if you don’t want the rows to be scanned,
use fetch.
sourcepub fn map<F>(
self,
function: F,
optimizations: Option<AllowedOptimizations>,
schema: Option<Arc<dyn UdfSchema>>,
name: Option<&'static str>
) -> LazyFramewhere
F: 'static + Fn(DataFrame) -> PolarsResult<DataFrame> + Send + Sync,
pub fn map<F>(
self,
function: F,
optimizations: Option<AllowedOptimizations>,
schema: Option<Arc<dyn UdfSchema>>,
name: Option<&'static str>
) -> LazyFramewhere
F: 'static + Fn(DataFrame) -> PolarsResult<DataFrame> + Send + Sync,
Apply a function/closure once the logical plan get executed.
Warning
This can blow up in your face if the schema is changed due to the operation. The optimizer relies on a correct schema.
You can toggle certain optimizations off.
sourcepub fn with_row_count(self, name: &str, offset: Option<IdxSize>) -> LazyFrame
pub fn with_row_count(self, name: &str, offset: Option<IdxSize>) -> LazyFrame
Add a new column at index 0 that counts the rows.
Warning
This can have a negative effect on query performance. This may for instance block predicate pushdown optimization.
sourcepub fn unnest<I: IntoIterator<Item = S>, S: AsRef<str>>(self, cols: I) -> Self
Available on crate feature dtype-struct
only.
pub fn unnest<I: IntoIterator<Item = S>, S: AsRef<str>>(self, cols: I) -> Self
dtype-struct
only.Unnest the given Struct
columns. This means that the fields of the Struct
type will be
inserted as columns.