TableFrame#

TableFrame Expr#

TableFrame String#

class TdExprStringNameSpace(expr: ExprStringNameSpace)[source]#

Bases: object

contains(pattern: str | TdExpr, *, literal: bool = False, strict: bool = True) TdExpr[source]#

Evaluate if the string contains a pattern.

Parameters:
  • pattern – The pattern to search for.

  • literal – Take the pattern as a literal string (not a regex).

  • strict – if the given pattern is not valid regex, raise an error.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.contains("ab").alias("contains"))
>>>
┌──────┬──────────┐
│ a    ┆ contains │
│ ---  ┆ ---      │
│ str  ┆ bool     │
╞══════╪══════════╡
│ a    ┆ false    │
│ ab   ┆ true     │
│ b    ┆ false    │
│ xaby ┆ true     │
│ null ┆ null     │
└──────┴──────────┘
contains_any(patterns: td_expr.IntoTdExpr, *, ascii_case_insensitive: bool = False) td_expr.TdExpr[source]#

Evaluate if the string contains any of the given patterns.

Parameters:
  • patterns – The patterns to search for.

  • ascii_case_insensitive – If true, the search is case-insensitive.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.contains_any(["a", "b"]).alias("contains_any"))
>>>
┌──────┬──────────────┐
│ a    ┆ contains_any │
│ ---  ┆ ---          │
│ str  ┆ bool         │
╞══════╪══════════════╡
│ abc  ┆ true         │
│ axy  ┆ true         │
│ xyb  ┆ true         │
│ xyz  ┆ false        │
│ null ┆ null         │
└──────┴──────────────┘
count_matches(pattern: str | TdExpr, *, literal: bool = False) TdExpr[source]#

Counts the ocurrrences of the given pattern in the string.

Parameters:
  • pattern – The pattern to extract.

  • literal – Take the pattern as a literal string (not a regex).

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.count_matches("b.").alias("count_matches"))
>>>
┌───────────┬───────────────┐
│ a         ┆ count_matches │
│ ---       ┆ ---           │
│ str       ┆ u32           │
╞═══════════╪═══════════════╡
│ a bAb c d ┆ 2             │
│ bCbb c d  ┆ 2             │
│ bb        ┆ 1             │
│ b         ┆ 0             │
│ a         ┆ 0             │
│ null      ┆ null          │
└───────────┴───────────────┘
ends_with(suffix: str | TdExpr) TdExpr[source]#

Evaluate if the string ends with.

Parameters:

suffix – The suffix to search for.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.ends_with("b").alias("ends_with"))
>>>
┌──────┬───────────┐
│ a    ┆ ends_with │
│ ---  ┆ ---       │
│ str  ┆ bool      │
╞══════╪═══════════╡
│ a    ┆ false     │
│ ab   ┆ true      │
│ b    ┆ true      │
│ xaby ┆ false     │
│ null ┆ null      │
└──────┴───────────┘
extract(pattern: TdExpr | Series | str, group_index: int = 1) TdExpr[source]#

Extract a pattern from the string.

Parameters:
  • pattern – The pattern to extract.

  • group_index – The group index to extract.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.extract("(b.b)", 1).alias("extract"))
>>>
┌───────────┬─────────┐
│ a         ┆ extract │
│ ---       ┆ ---     │
│ str       ┆ str     │
╞═══════════╪═════════╡
│ a bAb c d ┆ bAb     │
│ bCbb c d  ┆ bCb     │
│ bb        ┆ null    │
│ null      ┆ null    │
└───────────┴─────────┘
find(pattern: str | TdExpr, *, literal: bool = False, strict: bool = True) TdExpr[source]#

Find the position of the first occurrence of the given pattern.

Parameters:
  • pattern – The pattern to search for.

  • literal – Take the pattern as a literal string (not a regex).

  • strict – if the given pattern is not valid regex, raise an error.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.find("b").alias("find"))
>>>
┌──────┬──────┐
│ a    ┆ find │
│ ---  ┆ ---  │
│ str  ┆ u32  │
╞══════╪══════╡
│ a    ┆ null │
│ ab   ┆ 1    │
│ b    ┆ 0    │
│ xaby ┆ 2    │
│ null ┆ null │
└──────┴──────┘
head(n: int | TdExpr | Series | str) TdExpr[source]#

Extract the start of the string up to the given length.

Parameters:

n – The length of the head.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.head(2).alias("head"))
>>>
┌──────┬──────┐
│ a    ┆ head │
│ ---  ┆ ---  │
│ str  ┆ str  │
╞══════╪══════╡
│ abc  ┆ ab   │
│ a    ┆ a    │
│ null ┆ null │
└──────┴──────┘
len_bytes() TdExpr[source]#

Return number of bytes (not chars) of a string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.len_bytes().alias("len_bytes"))
>>>
┌──────┬────────────┐
│ a    ┆ to_decimal │
│ ---  ┆ ---        │
│ str  ┆ u32        │
╞══════╪════════════╡
│ ab   ┆ 2          │
│ 再   ┆ 3          │
│ null ┆ null       │
└──────┴────────────┘
len_chars() TdExpr[source]#

Return number of chars (not bytes) of a string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.len_chars().alias("len_chars"))
>>>
┌──────┬────────────┐
│ a    ┆ to_decimal │
│ ---  ┆ ---        │
│ str  ┆ u32        │
╞══════╪════════════╡
│ ab   ┆ 2          │
│ 再   ┆ 3          │
│ null ┆ null       │
└──────┴────────────┘
pad_end(length: int, fill_char: str = ' ') TdExpr[source]#

Pad string values at the end to the given length using the given fill character.

Parameters:
  • length – The length to end pad the string to.

  • fill_char – The character to use for padding.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.pad_end(6, "-").alias("pad_end"))
>>>
┌────────┬─────────┐
│ a      ┆ pad_end │
│ ---    ┆ ---     │
│ str    ┆ str     │
╞════════╪═════════╡
│ abc    ┆ abc---  │
│    def ┆    def  │
│ null   ┆ null    │
└────────┴─────────┘
pad_start(length: int, fill_char: str = ' ') TdExpr[source]#

Pad string values at the front to the given length using the given fill character.

Parameters:
  • length – The length to front pad the string to.

  • fill_char – The character to use for padding.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.pad_start(6, "-").alias("pad_start"))
>>>
┌────────┬───────────┐
│ a      ┆ pad_start │
│ ---    ┆ ---       │
│ str    ┆ str       │
╞════════╪═══════════╡
│ abc    ┆ ---abc    │
│    def ┆    def    │
│ null   ┆ null      │
└────────┴───────────┘
replace(pattern: str | TdExpr, value: str | TdExpr, *, literal: bool = False, n: int = 1) TdExpr[source]#

Replace the first occurence of a pattern with the given string.

Parameters:
  • pattern – The pattern to replace.

  • value – The value to replace the pattern with.

  • literal – Take the pattern as a literal string (not a regex).

  • n – Number of matches to replace.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.replace("b", "X").alias("replace"))
>>>
┌───────────┬───────────┐
│ a         ┆ replace   │
│ ---       ┆ ---       │
│ str       ┆ str       │
╞═══════════╪═══════════╡
│ a bAb c d ┆ a XAb c d │
│ bCbb c d  ┆ XCbb c d  │
│ bb        ┆ Xb        │
│ b         ┆ X         │
│ a         ┆ a         │
│ null      ┆ null      │
└───────────┴───────────┘
replace_all(pattern: str | TdExpr, value: str | TdExpr, *, literal: bool = False) TdExpr[source]#

Replace the all occurences of a pattern with the given string.

Parameters:
  • pattern – The pattern to replace.

  • value – The value to replace the pattern with.

  • literal – Take the pattern as a literal string (not a regex).

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.replace("b", "X").alias("replace"))
>>>
┌───────────┬─────────────┐
│ a         ┆ replace_all │
│ ---       ┆ ---         │
│ str       ┆ str         │
╞═══════════╪═════════════╡
│ a bAb c d ┆ a XAX c d   │
│ bCbb c d  ┆ XCXX c d    │
│ bb        ┆ XX          │
│ b         ┆ X           │
│ a         ┆ a           │
│ null      ┆ null        │
└───────────┴─────────────┘
replace_many(patterns: td_expr.IntoTdExpr | Mapping[str, str], replace_with: td_expr.IntoTdExpr | NoDefault = <no_default>, *, ascii_case_insensitive: bool = False) td_expr.TdExpr[source]#

Replace the all occurences of any the given patterns with the given string.

Parameters:
  • patterns – The patterns to replace.

  • replace_with – The value to replace the pattern with.

  • ascii_case_insensitive – If true, the search is case-insensitive.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.replace_many(["a", "b"], "X").alias("replace_many"))
>>>
┌──────┬──────────────┐
│ a    ┆ replace_many │
│ ---  ┆ ---          │
│ str  ┆ str          │
╞══════╪══════════════╡
│ abc  ┆ XXc          │
│ axy  ┆ Xxy          │
│ xyb  ┆ xyX          │
│ xyz  ┆ xyz          │
│ null ┆ null         │
└──────┴──────────────┘
reverse() TdExpr[source]#

Reverse the string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.reverse().alias("reverse"))
>>>
┌──────┬─────────┐
│ a    ┆ reverse │
│ ---  ┆ ---     │
│ str  ┆ str     │
╞══════╪═════════╡
│ abc  ┆ cba     │
│ a    ┆ a       │
│ null ┆ null    │
└──────┴─────────┘
slice(offset: int | TdExpr | Series | str, length: int | TdExpr | Series | str | None = None) TdExpr[source]#

Extract the substring at the given offset for the given length.

Parameters:
  • offset – The offset to start the slice.

  • length – The length of the slice. If None, slice until the end of the string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.slice(1,1).alias("slice"))
>>>
┌──────┬───────┐
│ a    ┆ slice │
│ ---  ┆ ---   │
│ str  ┆ str   │
╞══════╪═══════╡
│ abc  ┆ b     │
│ a    ┆       │
│ null ┆ null  │
└──────┴───────┘
starts_with(prefix: str | TdExpr) TdExpr[source]#

Evaluate if the string start with.

Parameters:

prefix – The suffix to search for.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.starts_with("a").alias("starts_with"))
>>>
┌──────┬────────────┐
│ a    ┆ start_with │
│ ---  ┆ ---        │
│ str  ┆ bool       │
╞══════╪════════════╡
│ a    ┆ true       │
│ ab   ┆ true       │
│ b    ┆ false      │
│ xaby ┆ false      │
│ null ┆ null       │
└──────┴────────────┘
strip_chars(characters: td_expr.IntoTdExpr = None) td_expr.TdExpr[source]#

Trim string values.

Parameters:

characters – Characters to trim from start and end of the string. All characteres in the given string are removed, regardless the order. Default is whitespace.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.strip_chars("a ").alias("strip_chars"))
>>>
┌─────────────────────────────────┬─────────────┐
│ a                               ┆ strip_chars │
│ ---                             ┆ ---         │
│ str                             ┆ str         │
╞═════════════════════════════════╪═════════════╡
│ acba cda                      … ┆ cba cd      │
│    xy z                         ┆ xy z        │
│ null                            ┆ null        │
└─────────────────────────────────┴─────────────┘
strip_chars_end(characters: td_expr.IntoTdExpr = None) td_expr.TdExpr[source]#

Trim string values from the end of the string.

Parameters:

characters – Characters to trim from start of the string. All ending characteres in the given string are removed, regardless the order. Default is whitespace.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.strip_chars_end("dc ").alias("strip_chars_end"))
>>>
┌───────────────────────────────┬─────────────────┐
│ a                             ┆ strip_chars_end │
│ ---                           ┆ ---             │
│ str                           ┆ str             │
╞═══════════════════════════════╪═════════════════╡
│ cba cd                        ┆ cba             │
│    xy z                       ┆    xy z         │
│ null                          ┆ null            │
└───────────────────────────────┴─────────────────┘
strip_chars_start(characters: td_expr.IntoTdExpr = None) td_expr.TdExpr[source]#

Trim string values from the start of the string.

Parameters:

characters – Characters to trim from start of the string. All starting characteres in the given string are removed, regardless the order. Default is whitespace.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.strip_chars_start("abc").alias("strip_chars_start"))
>>>
┌───────────────────────────────┬────────────────────────────┐
│ a                             ┆ strip_chars_start          │
│ ---                           ┆ ---                        │
│ str                           ┆ str                        │
╞═══════════════════════════════╪════════════════════════════╡
│ cba cd                        ┆  cd                        │
│    xy z                       ┆    xy z                    │
│ null                          ┆ null                       │
└───────────────────────────────┴────────────────────────────┘
strip_prefix(prefix: td_expr.IntoTdExpr) td_expr.TdExpr[source]#

Trim string values removing the given prefix

Parameters:

prefix – Prefix to remove from the string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.strip_prefix("cb").alias("strip_prefix"))
>>>
┌───────────────────────────────┬─────────────────┐
│ a                             ┆ strip_prefix    │
│ ---                           ┆ ---             │
│ str                           ┆ str             │
╞═══════════════════════════════╪═════════════════╡
│ cba cd                        ┆ a cd            │
│ bx                            ┆ bx              │
│ null                          ┆ null            │
└───────────────────────────────┴─────────────────┘
strip_suffix(suffix: td_expr.IntoTdExpr) td_expr.TdExpr[source]#

Trim string values removing the given suffix

Parameters:

suffix – Suffix to remove from the string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.strip_suffix("cd").alias("strip_suffix"))
>>>
┌───────────────────────────────┬─────────────────┐
│ a                             ┆ strip_suffix    │
│ ---                           ┆ ---             │
│ str                           ┆ str             │
╞═══════════════════════════════╪═════════════════╡
│ cba cd                        ┆ cba             │
│ bx                            ┆ bx              │
│ null                          ┆ null            │
└───────────────────────────────┴─────────────────┘
tail(n: int | TdExpr | Series | str) TdExpr[source]#

Extract the end of the string up to the given length.

Parameters:

n – The length of the tail.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.tail(2).alias("tail"))
>>>
┌──────┬──────┐
│ a    ┆ tail │
│ ---  ┆ ---  │
│ str  ┆ str  │
╞══════╪══════╡
│ abc  ┆ bc   │
│ a    ┆ a    │
│ null ┆ null │
└──────┴──────┘
to_date(fmt: str | None = None, *, strict: bool = True) TdExpr[source]#

Convert the string to a date.

Parameters:

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_date().alias("to_date"))
>>>
┌────────────┬────────────┐
│ a          ┆ to_date    │
│ ---        ┆ ---        │
│ str        ┆ date       │
╞════════════╪════════════╡
│ 2024-12-13 ┆ 2024-12-13 │
│ 2024-12-15 ┆ 2024-12-15 │
│ null       ┆ null       │
└────────────┴────────────┘
to_datetime(fmt: str | None = None, *, time_unit: Literal['ns', 'us', 'ms'] | None = None, time_zone: str | None = None, strict: bool = True, ambiguous: Literal['earliest', 'latest', 'raise', 'null'] | TdExpr = 'raise') TdExpr[source]#

Convert the string to a datetime.

Parameters:
  • fmt

    The datetime format string (default %Y-%m-%d %H:%M:%S)

    [formats]

    (https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).

  • time_unit – {None, ‘us’, ‘ns’, ‘ms’} If None (default), it inferred from the format string

  • time_zone – Time zone for the resulting value.

  • strict – If the conversion fails an error will be raised.

  • ambiguous – Policy to apply on ambiguos Datetimes: ‘raise’: saises an error ‘earliest’: use the earliest datetime ‘latest’: use the latest datetime ‘null’: set to null

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_datetime().alias("to_datetime"))
>>>
┌─────────────────────┬─────────────────────┐
│ a                   ┆ to_datetime         │
│ ---                 ┆ ---                 │
│ str                 ┆ datetime[μs]        │
╞═════════════════════╪═════════════════════╡
│ 2024-12-13 08:45:34 ┆ 2024-12-13 08:45:34 │
│ 2024-12-15 18:33:00 ┆ 2024-12-15 18:33:00 │
│ null                ┆ null                │
└─────────────────────┴─────────────────────┘
to_integer(*, base: int | TdExpr | Series | str = 10, strict: bool = True) TdExpr[source]#

Covert a string to integer.

Parameters:
  • base – The base of the integer.

  • strict – If true, raise an error if the string is not a valid integer.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a")
      .str.to_integer(strict=False).alias("to_integer"))
>>>
┌──────┬────────────┐
│ a    ┆ to_integer │
│ ---  ┆ ---        │
│ str  ┆ i64        │
╞══════╪════════════╡
│ 1    ┆ 1          │
│ 2.2  ┆ null       │
│ a    ┆ null       │
│ null ┆ null       │
└──────┴────────────┘
to_lowercase() TdExpr[source]#

Return the lowercase of a string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_lowercase().alias("to_lowercase"))
>>>
┌──────┬───────────────┐
│ a    ┆ to_lowerrcase │
│ ---  ┆ ---           │
│ str  ┆ u32           │
╞══════╪═══════════════╡
│ aB   ┆ ab            │
│ null ┆ null          │
└──────┴───────────────┘
to_time(fmt: str | None = None, *, strict: bool = True, cache: bool = True) TdExpr[source]#

Convert the string to a time.

Parameters:

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_time().alias("to_time"))
>>>
┌─────────────────────┬─────────────────────┐
│ a                   ┆ to_datetime         │
│ ---                 ┆ ---                 │
│ str                 ┆ datetime[μs]        │
╞═════════════════════╪═════════════════════╡
│ 2024-12-13 08:45:34 ┆ 2024-12-13 08:45:34 │
│ 2024-12-15 18:33:00 ┆ 2024-12-15 18:33:00 │
│ null                ┆ null                │
└─────────────────────┴─────────────────────┘
to_titlecase() TdExpr[source]#

Uppercase the first character and lowercase all the others ones of a string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_titlecase().alias("titlecase"))
>>>
┌──────┬───────────┐
│ a    ┆ titlecase │
│ ---  ┆ ---       │
│ str  ┆ str       │
╞══════╪═══════════╡
│ ab   ┆ Ab        │
│ Ab   ┆ Ab        │
│ AB   ┆ Ab        │
│ aB   ┆ Ab        │
│ null ┆ null      │
└──────┴───────────┘
to_uppercase() TdExpr[source]#

Return the uppercase of a string.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.to_uppercase().alias("to_uppercase"))
>>>
┌──────┬──────────────┐
│ a    ┆ to_uppercase │
│ ---  ┆ ---          │
│ str  ┆ u32          │
╞══════╪══════════════╡
│ aB   ┆ AB           │
│ null ┆ null         │
└──────┴──────────────┘
zfill(length: int | TdExpr | Series | str) TdExpr[source]#

Pad numeric string values at the start to the given length using zeros.

Parameters:

length – The length to end pad the string to.

Example:

>>> import tabsdata as td
>>>
>>> tf: td.TableFrame ...
>>>
>>> tf.select(td.col("a"), td.col("a").str.zfill(2).alias("zfill"))
>>>
┌──────┬───────┐
│ a    ┆ zfill │
│ ---  ┆ ---   │
│ str  ┆ str   │
╞══════╪═══════╡
│ 0    ┆ 00    │
│ 1    ┆ 01    │
│ 1000 ┆ 1000  │
│ null ┆ null  │
└──────┴───────┘
to_tdexpr(expr: Expr) TdExpr[source]#