Source code for elphick.mass_composition.utils.pd_utils

"""
Pandas utils
"""
import inspect
import logging
from typing import List, Dict, Optional

import pandas as pd
from pandas import DataFrame
from pandas.core.dtypes.common import is_float_dtype

from elphick.mass_composition.utils import solve_mass_moisture
from elphick.mass_composition.utils.size import mean_size


[docs]def column_prefixes(columns: List[str]) -> Dict[str, List[str]]: return {prefix: [col for col in columns if prefix == col.split('_')[0]] for prefix in list(dict.fromkeys([col.split('_')[0] for col in columns if len(col.split('_')) > 1]))}
[docs]def column_prefix_counts(columns: List[str]) -> Dict[str, int]: return {k: len(v) for k, v in column_prefixes(columns).items()}
[docs]def mass_to_composition(df: pd.DataFrame, mass_wet: str = 'mass_wet', mass_dry: str = 'mass_dry') -> pd.DataFrame: """Convert a mass DataFrame to composition Args: df: The pd.DataFrame containing mass. H2O if provided will be ignored. All columns other than the mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid. Assumes composition is in %w/w units. mass_wet: The wet mass column, not optional. Consider solve_mass_moisture prior to this call if needed. mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed. Returns: A pd.Dataframe containing mass (wet and dry mass) and composition """ non_float_cols = _detect_non_float_columns(df) non_component_cols: List[str] = [mass_wet.lower(), mass_dry.lower(), 'h2o', 'moisture'] + [col.lower() for col in non_float_cols] mass: pd.DataFrame = df[[mass_wet, mass_dry]] component_cols = [col for col in df.columns if col.lower() not in non_component_cols] component_mass: pd.DataFrame = df[component_cols] composition: pd.DataFrame = component_mass.div(mass[mass_dry], axis=0) * 100.0 moisture: pd.Series = solve_mass_moisture(mass_wet=mass[mass_wet], mass_dry=mass[mass_dry]) return pd.concat([mass, moisture, composition], axis='columns')
[docs]def composition_to_mass(df: pd.DataFrame, mass_wet: str = 'mass_wet', mass_dry: str = 'mass_dry') -> pd.DataFrame: """Convert a composition Dataframe to mass Args: df: The pd.DataFrame containing mass_wet, mass+_dry and composition columns. H2O if provided will be dropped. All columns other than the mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid. Assumes composition is in %w/w units. mass_wet: The wet mass column, not optional. Consider solve_mass_moisture prior to this call if needed. mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed. Returns: A pd.Dataframe containing mass for all components """ non_float_cols = _detect_non_float_columns(df) non_component_cols: List[str] = [mass_wet.lower(), mass_dry.lower(), 'h2o', 'moisture'] + [col.lower() for col in non_float_cols] mass: pd.DataFrame = df[[mass_wet, mass_dry]] component_cols = [col for col in df.columns if col.lower() not in non_component_cols] composition: pd.DataFrame = df[component_cols] component_mass: pd.DataFrame = composition.mul(mass[mass_dry], axis=0) / 100.0 moisture_mass: pd.Series = pd.Series(mass[mass_wet] - mass[mass_dry], name='H2O', index=mass.index) return pd.concat([mass, moisture_mass, component_mass], axis='columns')
[docs]def weight_average(df: pd.DataFrame, mass_wet: str = 'mass_wet', mass_dry: str = 'mass_dry') -> DataFrame: """Weight Average a DataFrame containing mass-composition Args: df: The pd.DataFrame containing mass-composition. H2O if provided will be ignored. All columns other than the mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid. Assumes composition is in %w/w units. mass_wet: The wet mass column, not optional. Consider solve_mass_moisture prior to this call if needed. mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed. Returns: A pd.Series containing the total mass and weight averaged composition. """ non_float_cols = _detect_non_float_columns(df) mass_sum: pd.DataFrame = df.pipe(composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry).sum( axis="index").to_frame().T moisture: pd.Series = solve_mass_moisture(mass_wet=mass_sum[mass_wet], mass_dry=mass_sum[mass_dry]) component_cols = [col for col in df.columns if col.lower() not in [mass_wet, mass_dry, 'h2o', 'moisture'] + non_float_cols] weighted_composition: pd.Series = mass_sum[component_cols].div(mass_sum[mass_dry], axis=0) * 100 return pd.concat([mass_sum[[mass_wet, mass_dry]], moisture, weighted_composition], axis=1)
[docs]def calculate_recovery(df: pd.DataFrame, df_ref: pd.DataFrame, mass_wet: str = 'mass_wet', mass_dry: str = 'mass_dry') -> pd.DataFrame: """Calculate recovery of mass-composition for two DataFrames Args: df: The pd.DataFrame containing mass-composition. H2O if provided will be ignored. All columns other than the mass_wet and mass_dry are assumed to be `additive`, that is, dry mass weighting is valid. Assumes composition is in %w/w units. mass_wet: The wet mass column, not optional. Consider solve_mass_moisture prior to this call if needed. mass_dry: The dry mass column, not optional. Consider solve_mass_moisture prior to this call if needed. Returns: A pd.Series containing the total mass and weight averaged composition. """ res: pd.DataFrame = df.pipe(composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry) / df_ref.pipe( composition_to_mass, mass_wet=mass_wet, mass_dry=mass_dry) return res
[docs]def calculate_partition(df_feed: pd.DataFrame, df_ref: pd.DataFrame, col_mass_dry: str = 'mass_dry') -> pd.DataFrame: """Calculate the partition curve from two streams Applicable to the one dimensional case only. The PN is bounded [0, 1]. The interval mean for size is the geometric mean, otherwise the arithmetic mean. The interval mean is named `da`, which can be interpreted as `diameter-average` or `density-average`. TODO: consider a generalised name, fraction-average -> fa? Args: df_feed: The pd.DataFrame containing mass-composition representing the fractionated feed. df_ref: The pd.DataFrame containing mass-composition representing the fractionated reference stream. col_mass_dry: The dry mass column, not optional. Returns: A pd.DataFrame containing the partition data. """ res: pd.DataFrame = df_ref[[col_mass_dry]].div(df_feed[[col_mass_dry]]).rename(columns={col_mass_dry: 'PN'}) if df_ref.index.name.lower() == 'size': res.insert(loc=0, column='da', value=mean_size(res.index)) else: res.insert(loc=0, column='da', value=res.index.mid) return res
def _detect_non_float_columns(df): _logger: logging.Logger = logging.getLogger(inspect.stack()[1].function) non_float_cols: List = [col for col in df.columns if col not in df.select_dtypes(include=[float]).columns] if len(non_float_cols) > 0: _logger.info(f"The following columns are not float columns and will be ignored: {non_float_cols}") return non_float_cols