Classifiers

class DoesNotMatch(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
pattern: str,
tags: str | list[str] | None = None,
)

Bases: MatchClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value does not match a regex pattern.

class ExponentialScale(
min_val: Annotated[int, Strict(strict=True)] | Annotated[float, Strict(strict=True)],
max_val: Annotated[int, Strict(strict=True)] | Annotated[float, Strict(strict=True)],
base: Annotated[float, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Gt(gt=0)])] | None = None,
bins: Annotated[int, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Ge(ge=1), Le(le=100)])] = 100,
use_bin_zero: bool = False,
)

Bases: NonIdentityScale

Categories:

dq-classifier

An exponential scale.

property base: Annotated[float, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Gt(gt=0)])]

Returns the base of the exponential scale.

class HasLength(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
min_len: Annotated[int, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Ge(ge=0)])] = 0,
max_len: Annotated[int, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Ge(ge=0)])] = 9223372036854775807,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if the length of a value is within a given range.

property max: int

Returns the maximum allowed length.

property min: int

Returns the minimum allowed length.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by HasLength classifier.

class IdentityScale(
min_val: Annotated[int, Strict(strict=True)],
max_val: Annotated[int, Strict(strict=True)],
use_bin_zero: bool = False,
)

Bases: Scale

Categories:

dq-classifier

An identity scale where each integer value corresponds to a bin.

property bins

Returns the number of bins in the scale.

property scale_range: tuple[Annotated[int, Strict(strict=True)], Annotated[int, Strict(strict=True)]]

Returns the (min, max) range of the scale.

class IsBetween(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
min_val: int | float | bool | str | date | time | datetime | timedelta | bytes | Decimal | None = None,
max_val: int | float | bool | str | date | time | datetime | timedelta | bytes | Decimal | None = None,
closed_on: Literal['none', 'lower', 'upper', 'both'] = 'both',
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
on_wrong_value: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BetweenClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is inside a specified range.

class IsFalse(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is False.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsFalse classifier.

class IsIn(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_value: Literal['ignore', 'fail'] = 'ignore',
*,
values: Collection,
tags: str | list[str] | None = None,
)

Bases: InClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is in a specified set of values.

class IsNan(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is NaN (Not a Number).

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsNan classifier.

class IsNegative(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a numeric value is negative.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsNegative classifier.

class IsNegativeOrZero(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a numeric value is negative or zero.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsNegativeOrZero classifier.

class IsNotBetween(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
min_val: int | float | bool | str | date | time | datetime | timedelta | bytes | Decimal | None = None,
max_val: int | float | bool | str | date | time | datetime | timedelta | bytes | Decimal | None = None,
closed_on: Literal['none', 'lower', 'upper', 'both'] = 'both',
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
on_wrong_value: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BetweenClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is outside a specified range.

class IsNotIn(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_value: Literal['ignore', 'fail'] = 'ignore',
*,
values: Collection,
tags: str | list[str] | None = None,
)

Bases: InClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is not in a specified set of values.

class IsNotNan(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is not NaN.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsNotNan classifier.

class IsNotNull(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is not NULL.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsNotNull classifier.

class IsNotNullNorNan(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is neither NULL nor NaN.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsNotNullNorNan classifier.

class IsNotZero(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a numeric value is not zero.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsNotZero classifier.

class IsNull(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is NULL.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsNull classifier.

class IsNullOrNan(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is either NULL or NaN.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsNullOrNan classifier.

class IsPositive(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a numeric value is positive.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsPositive classifier.

class IsPositiveOrZero(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a numeric value is positive or zero.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsPositiveOrZero classifier.

class IsTrue(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value is True.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsTrue classifier.

class IsZero(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
tags: str | list[str] | None = None,
)

Bases: BoolClassifier

Categories:

dq-classifier

A boolean classifier that checks if a numeric value is zero.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by IsZero classifier.

class LinearScale(
min_val: Annotated[int, Strict(strict=True)] | Annotated[float, Strict(strict=True)],
max_val: Annotated[int, Strict(strict=True)] | Annotated[float, Strict(strict=True)],
bins: Annotated[int, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Ge(ge=1), Le(le=100)])] = 100,
use_bin_zero: bool = False,
)

Bases: NonIdentityScale

Categories:

dq-classifier

A linear scale.

class LogarithmicScale(
min_val: Annotated[int, Strict(strict=True)] | Annotated[float, Strict(strict=True)],
max_val: Annotated[int, Strict(strict=True)] | Annotated[float, Strict(strict=True)],
base: Annotated[float, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Gt(gt=0)])] | None = None,
bins: Annotated[int, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Ge(ge=1), Le(le=100)])] = 100,
use_bin_zero: bool = False,
)

Bases: NonIdentityScale

Categories:

dq-classifier

A logarithmic scale.

property base: Annotated[float, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Gt(gt=0)])]

Returns the base of the logarithmic scale.

class Matches(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
*,
pattern: str,
tags: str | list[str] | None = None,
)

Bases: MatchClassifier

Categories:

dq-classifier

A boolean classifier that checks if a value matches a regex pattern.

class MonomialScale(
min_val: Annotated[int, Strict(strict=True)] | Annotated[float, Strict(strict=True)],
max_val: Annotated[int, Strict(strict=True)] | Annotated[float, Strict(strict=True)],
power: Annotated[float, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Gt(gt=0)])],
bins: Annotated[int, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Ge(ge=1), Le(le=100)])] = 100,
use_bin_zero: bool = False,
)

Bases: NonIdentityScale

Categories:

dq-classifier

A monomial (power-law) scale.

property power: Annotated[float, Strict(strict=True), FieldInfo(annotation=NoneType, required=True, metadata=[Gt(gt=0)])]

Returns the power of the monomial scale.

class ScaleCategorizer(
column_names: str | tuple[str, str | None] | Annotated[list[str], Strict] | Annotated[list[tuple[str, str | None]], Strict] | None = None,
on_missing_column: Literal['ignore', 'fail'] = 'ignore',
on_wrong_type: Literal['ignore', 'fail'] = 'ignore',
on_wrong_scale_value: Literal['ignore', 'fail'] = 'ignore',
*,
scale: Scale,
tags: str | list[str] | None = None,
)

Bases: Categorizer

Categories:

dq-classifier

A classifier that bucketizes data into bins based on a given scale.

property scale: Scale

Returns the scale used for categorization.

classmethod supported_dtypes() FrozenSet[Type]

Returns the set of data types supported by categorizers.