TableFrame#

TableFrame Expr#

TableFrame String#

class TdExprStringNameSpace(expr: ExprStringNameSpace)[source]#

Bases: object

contains(pattern: str | TdExpr, *, literal: bool = False, strict: bool = True) → TdExpr[source]#

Evaluate if the string contains a pattern.

Parameters:

pattern – The pattern to search for.
literal – Take the pattern as a literal string (not a regex).
strict – if the given pattern is not valid regex, raise an error.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.contains("ab").alias("contains"))
>>>
┌──────┬──────────┐
│ a    ┆ contains │
│ ---  ┆ ---      │
│ str  ┆ bool     │
╞══════╪══════════╡
│ a    ┆ false    │
│ ab   ┆ true     │
│ b    ┆ false    │
│ xaby ┆ true     │
│ null ┆ null     │
└──────┴──────────┘

contains_any(patterns: td_expr.IntoTdExpr, *, ascii_case_insensitive: bool = False) → td_expr.TdExpr[source]#

Evaluate if the string contains any of the given patterns.

Parameters:

patterns – The patterns to search for.
ascii_case_insensitive – If true, the search is case-insensitive.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.contains_any(["a", "b"]).alias("contains_any"))
>>>
┌──────┬──────────────┐
│ a    ┆ contains_any │
│ ---  ┆ ---          │
│ str  ┆ bool         │
╞══════╪══════════════╡
│ abc  ┆ true         │
│ axy  ┆ true         │
│ xyb  ┆ true         │
│ xyz  ┆ false        │
│ null ┆ null         │
└──────┴──────────────┘

count_matches(pattern: str | TdExpr, *, literal: bool = False) → TdExpr[source]#

Counts the ocurrrences of the given pattern in the string.

Parameters:

pattern – The pattern to extract.
literal – Take the pattern as a literal string (not a regex).

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.count_matches("b.").alias("count_matches"))
>>>
┌───────────┬───────────────┐
│ a         ┆ count_matches │
│ ---       ┆ ---           │
│ str       ┆ u32           │
╞═══════════╪═══════════════╡
│ a bAb c d ┆ 2             │
│ bCbb c d  ┆ 2             │
│ bb        ┆ 1             │
│ b         ┆ 0             │
│ a         ┆ 0             │
│ null      ┆ null          │
└───────────┴───────────────┘

ends_with(suffix: str | TdExpr) → TdExpr[source]#

Evaluate if the string ends with.

Parameters:: suffix – The suffix to search for.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.ends_with("b").alias("ends_with"))
>>>
┌──────┬───────────┐
│ a    ┆ ends_with │
│ ---  ┆ ---       │
│ str  ┆ bool      │
╞══════╪═══════════╡
│ a    ┆ false     │
│ ab   ┆ true      │
│ b    ┆ true      │
│ xaby ┆ false     │
│ null ┆ null      │
└──────┴───────────┘

extract(pattern: TdExpr | Series | str, group_index: int = 1) → TdExpr[source]#

Extract a pattern from the string.

Parameters:

pattern – The pattern to extract.
group_index – The group index to extract.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.extract("(b.b)", 1).alias("extract"))
>>>
┌───────────┬─────────┐
│ a         ┆ extract │
│ ---       ┆ ---     │
│ str       ┆ str     │
╞═══════════╪═════════╡
│ a bAb c d ┆ bAb     │
│ bCbb c d  ┆ bCb     │
│ bb        ┆ null    │
│ null      ┆ null    │
└───────────┴─────────┘

find(pattern: str | TdExpr, *, literal: bool = False, strict: bool = True) → TdExpr[source]#

Find the position of the first occurrence of the given pattern.

Parameters:

pattern – The pattern to search for.
literal – Take the pattern as a literal string (not a regex).
strict – if the given pattern is not valid regex, raise an error.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.find("b").alias("find"))
>>>
┌──────┬──────┐
│ a    ┆ find │
│ ---  ┆ ---  │
│ str  ┆ u32  │
╞══════╪══════╡
│ a    ┆ null │
│ ab   ┆ 1    │
│ b    ┆ 0    │
│ xaby ┆ 2    │
│ null ┆ null │
└──────┴──────┘

head(n: int | TdExpr | Series | str) → TdExpr[source]#

Extract the start of the string up to the given length.

Parameters:: n – The length of the head.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.head(2).alias("head"))
>>>
┌──────┬──────┐
│ a    ┆ head │
│ ---  ┆ ---  │
│ str  ┆ str  │
╞══════╪══════╡
│ abc  ┆ ab   │
│ a    ┆ a    │
│ null ┆ null │
└──────┴──────┘

len_bytes() → TdExpr[source]#

Return number of bytes (not chars) of a string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.len_bytes().alias("len_bytes"))
>>>
┌──────┬────────────┐
│ a    ┆ to_decimal │
│ ---  ┆ ---        │
│ str  ┆ u32        │
╞══════╪════════════╡
│ ab   ┆ 2          │
│ 再   ┆ 3          │
│ null ┆ null       │
└──────┴────────────┘

len_chars() → TdExpr[source]#

Return number of chars (not bytes) of a string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.len_chars().alias("len_chars"))
>>>
┌──────┬────────────┐
│ a    ┆ to_decimal │
│ ---  ┆ ---        │
│ str  ┆ u32        │
╞══════╪════════════╡
│ ab   ┆ 2          │
│ 再   ┆ 3          │
│ null ┆ null       │
└──────┴────────────┘

pad_end(length: int, fill_char: str = ' ') → TdExpr[source]#

Pad string values at the end to the given length using the given fill character.

Parameters:

length – The length to end pad the string to.
fill_char – The character to use for padding.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.pad_end(6, "-").alias("pad_end"))
>>>
┌────────┬─────────┐
│ a      ┆ pad_end │
│ ---    ┆ ---     │
│ str    ┆ str     │
╞════════╪═════════╡
│ abc    ┆ abc---  │
│    def ┆    def  │
│ null   ┆ null    │
└────────┴─────────┘

pad_start(length: int, fill_char: str = ' ') → TdExpr[source]#

Pad string values at the front to the given length using the given fill character.

Parameters:

length – The length to front pad the string to.
fill_char – The character to use for padding.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.pad_start(6, "-").alias("pad_start"))
>>>
┌────────┬───────────┐
│ a      ┆ pad_start │
│ ---    ┆ ---       │
│ str    ┆ str       │
╞════════╪═══════════╡
│ abc    ┆ ---abc    │
│    def ┆    def    │
│ null   ┆ null      │
└────────┴───────────┘

replace(pattern: str | TdExpr, value: str | TdExpr, *, literal: bool = False, n: int = 1) → TdExpr[source]#

Replace the first occurence of a pattern with the given string.

Parameters:

pattern – The pattern to replace.
value – The value to replace the pattern with.
literal – Take the pattern as a literal string (not a regex).
n – Number of matches to replace.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.replace("b", "X").alias("replace"))
>>>
┌───────────┬───────────┐
│ a         ┆ replace   │
│ ---       ┆ ---       │
│ str       ┆ str       │
╞═══════════╪═══════════╡
│ a bAb c d ┆ a XAb c d │
│ bCbb c d  ┆ XCbb c d  │
│ bb        ┆ Xb        │
│ b         ┆ X         │
│ a         ┆ a         │
│ null      ┆ null      │
└───────────┴───────────┘

replace_all(pattern: str | TdExpr, value: str | TdExpr, *, literal: bool = False) → TdExpr[source]#

Replace the all occurences of a pattern with the given string.

Parameters:

pattern – The pattern to replace.
value – The value to replace the pattern with.
literal – Take the pattern as a literal string (not a regex).

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.replace("b", "X").alias("replace"))
>>>
┌───────────┬─────────────┐
│ a         ┆ replace_all │
│ ---       ┆ ---         │
│ str       ┆ str         │
╞═══════════╪═════════════╡
│ a bAb c d ┆ a XAX c d   │
│ bCbb c d  ┆ XCXX c d    │
│ bb        ┆ XX          │
│ b         ┆ X           │
│ a         ┆ a           │
│ null      ┆ null        │
└───────────┴─────────────┘

replace_many(patterns: td_expr.IntoTdExpr | Mapping[str, str], replace_with: td_expr.IntoTdExpr | NoDefault = <no_default>, *, ascii_case_insensitive: bool = False) → td_expr.TdExpr[source]#

Replace the all occurences of any the given patterns with the given string.

Parameters:

patterns – The patterns to replace.
replace_with – The value to replace the pattern with.
ascii_case_insensitive – If true, the search is case-insensitive.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.replace_many(["a", "b"], "X").alias("replace_many"))
>>>
┌──────┬──────────────┐
│ a    ┆ replace_many │
│ ---  ┆ ---          │
│ str  ┆ str          │
╞══════╪══════════════╡
│ abc  ┆ XXc          │
│ axy  ┆ Xxy          │
│ xyb  ┆ xyX          │
│ xyz  ┆ xyz          │
│ null ┆ null         │
└──────┴──────────────┘

reverse() → TdExpr[source]#

Reverse the string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.reverse().alias("reverse"))
>>>
┌──────┬─────────┐
│ a    ┆ reverse │
│ ---  ┆ ---     │
│ str  ┆ str     │
╞══════╪═════════╡
│ abc  ┆ cba     │
│ a    ┆ a       │
│ null ┆ null    │
└──────┴─────────┘

Extract the substring at the given offset for the given length.

Parameters:

offset – The offset to start the slice.
length – The length of the slice. If None, slice until the end of the string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.slice(1,1).alias("slice"))
>>>
┌──────┬───────┐
│ a    ┆ slice │
│ ---  ┆ ---   │
│ str  ┆ str   │
╞══════╪═══════╡
│ abc  ┆ b     │
│ a    ┆       │
│ null ┆ null  │
└──────┴───────┘

starts_with(prefix: str | TdExpr) → TdExpr[source]#

Evaluate if the string start with.

Parameters:: prefix – The suffix to search for.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.starts_with("a").alias("starts_with"))
>>>
┌──────┬────────────┐
│ a    ┆ start_with │
│ ---  ┆ ---        │
│ str  ┆ bool       │
╞══════╪════════════╡
│ a    ┆ true       │
│ ab   ┆ true       │
│ b    ┆ false      │
│ xaby ┆ false      │
│ null ┆ null       │
└──────┴────────────┘

strip_chars(characters: td_expr.IntoTdExpr = None) → td_expr.TdExpr[source]#

Trim string values.

Parameters:: characters – Characters to trim from start and end of the string. All characteres in the given string are removed, regardless the order. Default is whitespace.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.strip_chars("a ").alias("strip_chars"))
>>>
┌─────────────────────────────────┬─────────────┐
│ a                               ┆ strip_chars │
│ ---                             ┆ ---         │
│ str                             ┆ str         │
╞═════════════════════════════════╪═════════════╡
│ acba cda                      … ┆ cba cd      │
│    xy z                         ┆ xy z        │
│ null                            ┆ null        │
└─────────────────────────────────┴─────────────┘

strip_chars_end(characters: td_expr.IntoTdExpr = None) → td_expr.TdExpr[source]#

Trim string values from the end of the string.

Parameters:: characters – Characters to trim from start of the string. All ending characteres in the given string are removed, regardless the order. Default is whitespace.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.strip_chars_end("dc ").alias("strip_chars_end"))
>>>
┌───────────────────────────────┬─────────────────┐
│ a                             ┆ strip_chars_end │
│ ---                           ┆ ---             │
│ str                           ┆ str             │
╞═══════════════════════════════╪═════════════════╡
│ cba cd                        ┆ cba             │
│    xy z                       ┆    xy z         │
│ null                          ┆ null            │
└───────────────────────────────┴─────────────────┘

strip_chars_start(characters: td_expr.IntoTdExpr = None) → td_expr.TdExpr[source]#

Trim string values from the start of the string.

Parameters:: characters – Characters to trim from start of the string. All starting characteres in the given string are removed, regardless the order. Default is whitespace.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.strip_chars_start("abc").alias("strip_chars_start"))
>>>
┌───────────────────────────────┬────────────────────────────┐
│ a                             ┆ strip_chars_start          │
│ ---                           ┆ ---                        │
│ str                           ┆ str                        │
╞═══════════════════════════════╪════════════════════════════╡
│ cba cd                        ┆  cd                        │
│    xy z                       ┆    xy z                    │
│ null                          ┆ null                       │
└───────────────────────────────┴────────────────────────────┘

strip_prefix(prefix: td_expr.IntoTdExpr) → td_expr.TdExpr[source]#

Trim string values removing the given prefix

Parameters:: prefix – Prefix to remove from the string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.strip_prefix("cb").alias("strip_prefix"))
>>>
┌───────────────────────────────┬─────────────────┐
│ a                             ┆ strip_prefix    │
│ ---                           ┆ ---             │
│ str                           ┆ str             │
╞═══════════════════════════════╪═════════════════╡
│ cba cd                        ┆ a cd            │
│ bx                            ┆ bx              │
│ null                          ┆ null            │
└───────────────────────────────┴─────────────────┘

strip_suffix(suffix: td_expr.IntoTdExpr) → td_expr.TdExpr[source]#

Trim string values removing the given suffix

Parameters:: suffix – Suffix to remove from the string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.strip_suffix("cd").alias("strip_suffix"))
>>>
┌───────────────────────────────┬─────────────────┐
│ a                             ┆ strip_suffix    │
│ ---                           ┆ ---             │
│ str                           ┆ str             │
╞═══════════════════════════════╪═════════════════╡
│ cba cd                        ┆ cba             │
│ bx                            ┆ bx              │
│ null                          ┆ null            │
└───────────────────────────────┴─────────────────┘

tail(n: int | TdExpr | Series | str) → TdExpr[source]#

Extract the end of the string up to the given length.

Parameters:: n – The length of the tail.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.tail(2).alias("tail"))
>>>
┌──────┬──────┐
│ a    ┆ tail │
│ ---  ┆ ---  │
│ str  ┆ str  │
╞══════╪══════╡
│ abc  ┆ bc   │
│ a    ┆ a    │
│ null ┆ null │
└──────┴──────┘

to_date(fmt: str | None = None, *, strict: bool = True) → TdExpr[source]#

Convert the string to a date.

Parameters:

fmt –

The date format string (default %Y-%m-%d)
[formats]

(https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).
strict – Whether to parse the date strictly.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_date().alias("to_date"))
>>>
┌────────────┬────────────┐
│ a          ┆ to_date    │
│ ---        ┆ ---        │
│ str        ┆ date       │
╞════════════╪════════════╡
│ 2024-12-13 ┆ 2024-12-13 │
│ 2024-12-15 ┆ 2024-12-15 │
│ null       ┆ null       │
└────────────┴────────────┘

to_datetime(fmt: str | None = None, *, time_unit: Literal['ns', 'us', 'ms'] | None = None, time_zone: str | None = None, strict: bool = True, ambiguous: Literal['earliest', 'latest', 'raise', 'null'] | TdExpr = 'raise') → TdExpr[source]#

Convert the string to a datetime.

Parameters:

fmt –

The datetime format string (default %Y-%m-%d %H:%M:%S)
[formats]

(https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).
time_unit – {None, ‘us’, ‘ns’, ‘ms’} If None (default), it inferred from the format string
time_zone – Time zone for the resulting value.
strict – If the conversion fails an error will be raised.
ambiguous – Policy to apply on ambiguos Datetimes: ‘raise’: saises an error ‘earliest’: use the earliest datetime ‘latest’: use the latest datetime ‘null’: set to null

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_datetime().alias("to_datetime"))
>>>
┌─────────────────────┬─────────────────────┐
│ a                   ┆ to_datetime         │
│ ---                 ┆ ---                 │
│ str                 ┆ datetime[μs]        │
╞═════════════════════╪═════════════════════╡
│ 2024-12-13 08:45:34 ┆ 2024-12-13 08:45:34 │
│ 2024-12-15 18:33:00 ┆ 2024-12-15 18:33:00 │
│ null                ┆ null                │
└─────────────────────┴─────────────────────┘

to_integer(*, base: int | TdExpr | Series | str = 10, strict: bool = True) → TdExpr[source]#

Covert a string to integer.

Parameters:

base – The base of the integer.
strict – If true, raise an error if the string is not a valid integer.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.to_integer(strict=False).alias("to_integer"))
>>>
┌──────┬────────────┐
│ a    ┆ to_integer │
│ ---  ┆ ---        │
│ str  ┆ i64        │
╞══════╪════════════╡
│ 1    ┆ 1          │
│ 2.2  ┆ null       │
│ a    ┆ null       │
│ null ┆ null       │
└──────┴────────────┘

to_lowercase() → TdExpr[source]#

Return the lowercase of a string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_lowercase().alias("to_lowercase"))
>>>
┌──────┬───────────────┐
│ a    ┆ to_lowerrcase │
│ ---  ┆ ---           │
│ str  ┆ u32           │
╞══════╪═══════════════╡
│ aB   ┆ ab            │
│ null ┆ null          │
└──────┴───────────────┘

to_time(fmt: str | None = None, *, strict: bool = True, cache: bool = True) → TdExpr[source]#

Convert the string to a time.

Parameters:

fmt –

The time format string (default %H:%M:%S)
[formats]

(https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).
strict – Whether to parse the date strictly.
cache – Whether to cache the date.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_time().alias("to_time"))
>>>
┌─────────────────────┬─────────────────────┐
│ a                   ┆ to_datetime         │
│ ---                 ┆ ---                 │
│ str                 ┆ datetime[μs]        │
╞═════════════════════╪═════════════════════╡
│ 2024-12-13 08:45:34 ┆ 2024-12-13 08:45:34 │
│ 2024-12-15 18:33:00 ┆ 2024-12-15 18:33:00 │
│ null                ┆ null                │
└─────────────────────┴─────────────────────┘

to_titlecase() → TdExpr[source]#

Uppercase the first character and lowercase all the others ones of a string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_titlecase().alias("titlecase"))
>>>
┌──────┬───────────┐
│ a    ┆ titlecase │
│ ---  ┆ ---       │
│ str  ┆ str       │
╞══════╪═══════════╡
│ ab   ┆ Ab        │
│ Ab   ┆ Ab        │
│ AB   ┆ Ab        │
│ aB   ┆ Ab        │
│ null ┆ null      │
└──────┴───────────┘

to_uppercase() → TdExpr[source]#

Return the uppercase of a string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_uppercase().alias("to_uppercase"))
>>>
┌──────┬──────────────┐
│ a    ┆ to_uppercase │
│ ---  ┆ ---          │
│ str  ┆ u32          │
╞══════╪══════════════╡
│ aB   ┆ AB           │
│ null ┆ null         │
└──────┴──────────────┘

zfill(length: int | TdExpr | Series | str) → TdExpr[source]#

Pad numeric string values at the start to the given length using zeros.

Parameters:: length – The length to end pad the string to.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.zfill(2).alias("zfill"))
>>>
┌──────┬───────┐
│ a    ┆ zfill │
│ ---  ┆ ---   │
│ str  ┆ str   │
╞══════╪═══════╡
│ 0    ┆ 00    │
│ 1    ┆ 01    │
│ 1000 ┆ 1000  │
│ null ┆ null  │
└──────┴───────┘

to_tdexpr(expr: Expr) → TdExpr[source]#