Merging Schemas

In this example, we will demonstrate how to merge multiple schemas

import inspect
from pathlib import Path

import yaml

from elphick.pandera_utils.pandera_utils import load_schema_from_yaml, merge_schemas
from elphick.pandera_utils.utils.pandera_io_pandas_io import to_yaml

__file__ = Path(inspect.getfile(inspect.currentframe())).resolve()


def print_schemafile(filepath: Path):
    with open(filepath, "r", encoding="utf-8") as f:
        schema_yaml = yaml.safe_load(f)
        print(yaml.dump(schema_yaml, sort_keys=False, indent=2))

Load Schemas

Load and view the schemas from the YAML files

schema_filepath_1: Path = __file__.parents[1] / "assets/aliases_schema.yaml"
schema_filepath_2: Path = __file__.parents[1] / "assets/category_schema.yaml"
schema_filepath_3: Path = __file__.parents[1] / "assets/missing_sentinels_schema.yaml"
print_schemafile(schema_filepath_1)
columns:
  my_pet_column:
    dtype: object
    metadata:
      pandera_utils:
        aliases:
        - pets
        - my_pets
print_schemafile(schema_filepath_2)
columns:
  my_color_column:
    dtype: category
    checks:
      isin:
      - R
      - G
      - B
    metadata:
      pandera_utils:
        category:
          add_all_categories: true
          ordered: false
          label:
            map:
              R: Red
              G: Green
              B: Blue
            dtype: category
          description:
            map:
              R: The color red
              G: The color green
              B: The color blue
            dtype: category
          wavelength:
            map:
              R: 700
              G: 546
              B: 435
            dtype: int
print_schemafile(schema_filepath_3)
columns:
  my_nullable_int_column:
    dtype: Int8
    nullable: true
    coerce: true
    metadata:
      pandera_utils:
        missing_sentinels:
        - -1
        - -9

Merge Schemas

Merge the schemas into a single schema

schema_1 = load_schema_from_yaml(schema_filepath_1)
schema_2 = load_schema_from_yaml(schema_filepath_2)
schema_3 = load_schema_from_yaml(schema_filepath_3)
merged_schema = merge_schemas([schema_1, schema_2, schema_3])

Print the merged schema in a nicely formatted way use a temp file (formatting workaround only)

print(to_yaml(merged_schema))
schema_type: dataframe
version: 0.0.0+dev0
columns:
  my_pet_column:
    title: null
    description: null
    dtype: object
    nullable: false
    checks: null
    unique: false
    coerce: false
    required: true
    regex: false
    metadata:
      pandera_utils:
        aliases:
        - pets
        - my_pets
  my_color_column:
    title: null
    description: null
    dtype: category
    nullable: false
    checks:
      isin:
        value:
        - R
        - G
        - B
        options:
          raise_warning: false
          ignore_na: true
    unique: false
    coerce: false
    required: true
    regex: false
    metadata:
      pandera_utils:
        category:
          add_all_categories: true
          ordered: false
          label:
            map:
              R: Red
              G: Green
              B: Blue
            dtype: category
          description:
            map:
              R: The color red
              G: The color green
              B: The color blue
            dtype: category
          wavelength:
            map:
              R: 700
              G: 546
              B: 435
            dtype: int
  my_nullable_int_column:
    title: null
    description: null
    dtype: Int8
    nullable: true
    checks: null
    unique: false
    coerce: true
    required: true
    regex: false
    metadata:
      pandera_utils:
        missing_sentinels:
        - -1
        - -9
checks: null
index: null
dtype: null
coerce: false
strict: false
name: null
ordered: false
unique: null
report_duplicates: all
unique_column_names: false
add_missing_columns: false
title: null
description: null

Total running time of the script: (0 minutes 0.016 seconds)

Gallery generated by Sphinx-Gallery