Manipulation

TableFrame.cast(
dtypes: Mapping[td_typing.ColumnNameOrSelector | td_typing.DataType, td_typing.DataType] | td_typing.DataType,
*,
strict: bool = True,
) TableFrame
Categories:

manipulation

Cast columns to a new data type.

Parameters:
  • dtypes – Mapping of the column name(s) to the new data type(s).

  • strict – If True, raises an error if the cast cannot be performed.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ str  ┆ i64  │
╞══════╪══════╡
│ A    ┆ 1    │
│ X    ┆ 10   │
│ C    ┆ 3    │
│ D    ┆ 5    │
│ M    ┆ 9    │
│ A    ┆ 100  │
│ M    ┆ 50   │
│ null ┆ 20   │
│ F    ┆ null │
└──────┴──────┘
>>>
>>> tf.cast({"b":td.Float32}).collect()
>>>
┌──────┬───────┐
│ a    ┆ b     │
│ ---  ┆ ---   │
│ str  ┆ f32   │
╞══════╪═══════╡
│ A    ┆ 1.0   │
│ X    ┆ 10.0  │
│ C    ┆ 3.0   │
│ D    ┆ 5.0   │
│ M    ┆ 9.0   │
│ A    ┆ 100.0 │
│ M    ┆ 50.0  │
│ null ┆ 20.0  │
│ F    ┆ null  │
└──────┴───────┘
TableFrame.drop_nans(
subset: td_typing.ColumnNameOrSelector | Collection[td_typing.ColumnNameOrSelector] | None = None,
) TableFrame
Categories:

manipulation

Drop rows with NaN values.

Parameters:

subset – Columns to look for Nan values. If None, all columns are considered.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
┌──────┬──────┬──────┐
│ ss   ┆ u    ┆ ff   │
│ ---  ┆ ---  ┆ ---  │
│ str  ┆ i64  ┆ f64  │
╞══════╪══════╪══════╡
│ A    ┆ 1    ┆ 1.1  │
│ B    ┆ 0    ┆ 0.0  │
│ A    ┆ 2    ┆ 2.2  │
│ B    ┆ 3    ┆ 3.3  │
│ B    ┆ 4    ┆ 4.4  │
│ C    ┆ 5    ┆ -1.1 │
│ C    ┆ 6    ┆ -2.2 │
│ C    ┆ 7    ┆ -3.3 │
│ D    ┆ 8    ┆ inf  │
│ F    ┆ 9    ┆ NaN  │
│ null ┆ null ┆ null │
└──────┴──────┴──────┘
>>>
>>> tf.unique("a", keep="last")
┌─────┬─────┬──────┐
│ ss  ┆ u   ┆ ff   │
│ --- ┆ --- ┆ ---  │
│ str ┆ i64 ┆ f64  │
╞═════╪═════╪══════╡
│ A   ┆ 1   ┆ 1.1  │
│ B   ┆ 0   ┆ 0.0  │
│ A   ┆ 2   ┆ 2.2  │
│ B   ┆ 3   ┆ 3.3  │
│ B   ┆ 4   ┆ 4.4  │
│ C   ┆ 5   ┆ -1.1 │
│ C   ┆ 6   ┆ -2.2 │
│ C   ┆ 7   ┆ -3.3 │
│ D   ┆ 8   ┆ inf  │
└─────┴─────┴──────┘
>>>
TableFrame.drop_nulls(
subset: td_typing.ColumnNameOrSelector | Collection[td_typing.ColumnNameOrSelector] | None = None,
) TableFrame
Categories:

manipulation

Drop rows with null values.

Parameters:

subset – Columns to evaluate for null values. If None, all columns are considered.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
┌──────┬──────┬──────┐
│ ss   ┆ u    ┆ ff   │
│ ---  ┆ ---  ┆ ---  │
│ str  ┆ i64  ┆ f64  │
╞══════╪══════╪══════╡
│ A    ┆ 1    ┆ 1.1  │
│ B    ┆ 0    ┆ 0.0  │
│ A    ┆ 2    ┆ 2.2  │
│ B    ┆ 3    ┆ 3.3  │
│ B    ┆ 4    ┆ 4.4  │
│ C    ┆ 5    ┆ -1.1 │
│ C    ┆ 6    ┆ -2.2 │
│ C    ┆ 7    ┆ -3.3 │
│ D    ┆ 8    ┆ inf  │
│ F    ┆ 9    ┆ NaN  │
│ G    ┆ null ┆ 2.3  │
└──────┴──────┴──────┘
>>>
>>> tf.drop_nulls("a")
>>>
┌─────┬─────┬──────┐
│ ss  ┆ u   ┆ ff   │
│ --- ┆ --- ┆ ---  │
│ str ┆ i64 ┆ f64  │
╞═════╪═════╪══════╡
│ A   ┆ 1   ┆ 1.1  │
│ B   ┆ 0   ┆ 0.0  │
│ A   ┆ 2   ┆ 2.2  │
│ B   ┆ 3   ┆ 3.3  │
│ B   ┆ 4   ┆ 4.4  │
│ C   ┆ 5   ┆ -1.1 │
│ C   ┆ 6   ┆ -2.2 │
│ C   ┆ 7   ┆ -3.3 │
│ D   ┆ 8   ┆ inf  │
│ F   ┆ 9   ┆ NaN  │
└─────┴─────┴──────┘
TableFrame.fill_nan(
value: int | float | Expr | None,
) TableFrame
Categories:

manipulation

Replace all NaN values in the TableFrame with the given value.

Parameters:

value – The value to replace NaN with.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
┌──────┬──────┐
│ x    ┆ y    │
│ ---  ┆ ---  │
│ f64  ┆ f64  │
╞══════╪══════╡
│ 1.0  ┆ 2.0  │
│ 2.0  ┆ 2.0  │
│ NaN  ┆ NaN  │
│ 4.0  ┆ NaN  │
│ 5.0  ┆ null │
│ null ┆ null │
└──────┴──────┘
>>>
>>> tf.fill_nan(10)
>>>
┌──────┬──────┐
│ x    ┆ y    │
│ ---  ┆ ---  │
│ f64  ┆ f64  │
╞══════╪══════╡
│ 1.0  ┆ 2.0  │
│ 2.0  ┆ 2.0  │
│ 10.0 ┆ 10.0 │
│ 4.0  ┆ 10.0 │
│ 5.0  ┆ null │
│ null ┆ null │
└──────┴──────┘
TableFrame.fill_null(
value: Any | Expr | None = None,
) TableFrame
Categories:

manipulation

Replace all null values in the TableFrame with the given value.

Parameters:

value – The value to replace null with.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
┌──────┬──────┐
│ x    ┆ y    │
│ ---  ┆ ---  │
│ f64  ┆ f64  │
╞══════╪══════╡
│ 1.0  ┆ 2.0  │
│ 2.0  ┆ 2.0  │
│ NaN  ┆ NaN  │
│ 4.0  ┆ NaN  │
│ 5.0  ┆ null │
│ null ┆ null │
└──────┴──────┘
>>>
>>> tf.fill_null(20)
>>>
┌──────┬──────┐
│ x    ┆ y    │
│ ---  ┆ ---  │
│ f64  ┆ f64  │
╞══════╪══════╡
│ 1.0  ┆ 2.0  │
│ 2.0  ┆ 2.0  │
│ NaN  ┆ NaN  │
│ 4.0  ┆ NaN  │
│ 5.0  ┆ 20.0 │
│ 20.0 ┆ 20.0 │
└──────┴──────┘
Expr.alias(
name: str,
) Expr
Categories:

manipulation

Set the name for a column or expression.

Parameters:

name – Column or expression new name. The name must be a word ([A-Za-z_][A-Za-z0-9_]*) of up to 100 characters.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("age"), td.col("age").alias("Age"))
>>>
┌──────┬──────┐
│ age  ┆ Age  │
│ ---  ┆ ---  │
│ i64  ┆ i64  │
╞══════╪══════╡
│ 1    ┆ 1    │
│ 15   ┆ 15   │
│ 18   ┆ 18   │
│ 60   ┆ 60   │
│ 60   ┆ 60   │
│ 75   ┆ 75   │
│ null ┆ null │
└──────┴──────┘
Expr.fill_nan(
value: int | float | Expr | None,
) Expr
Categories:

manipulation

Replace NaN values with the given value.

Parameters:

value – The value to replace NaN values with.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf = tf.select(td.col("val"), td.col("val").fill_nan(5.5)
>>>        .alias("fill_nan"))
>>>
┌──────┬──────────┐
│ val  ┆ fill_nan │
│ ---  ┆ ---      │
│ f64  ┆ f64      │
╞══════╪══════════╡
│ 1.1  ┆ 1.1      │
│ 2.0  ┆ 2.0      │
│ inf  ┆ inf      │
│ null ┆ null     │
│ NaN  ┆ 5.5      │
└──────┴──────────┘
Expr.fill_null(
value: Any | Expr | None = None,
strategy: Literal['forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'] | None = None,
limit: int | None = None,
) Expr
Categories:

manipulation

Replace null values with the given value.

Parameters:
  • value – The value to replace null values with.

  • strategy – The strategy to use for filling null values.

  • limit – The maximum number of null values to replace.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf = tf.select(td.col("val"), td.col("val")
>>>        .fill_null(5.5).alias("fill_null"))
>>>
┌──────┬───────────┐
│ val  ┆ fill_null │
│ ---  ┆ ---       │
│ f64  ┆ f64       │
╞══════╪═══════════╡
│ -1.0 ┆ -1.0      │
│ 0.0  ┆ 0.0       │
│ 1.1  ┆ 1.1       │
│ 2.0  ┆ 2.0       │
│ inf  ┆ inf       │
│ null ┆ 5.5       │
│ NaN  ┆ NaN       │
└──────┴───────────┘