Resampling 2D Interval Data

The Sink Float metallurgical test splits/fractionates samples by density. The density fraction is often conducted by size fraction, resulting in 2D fractionation (interval) data.

This example demonstrates how to resample 2D interval data using the IntervalSample object.

import logging

# noinspection PyUnresolvedReferences
import numpy as np
import pandas as pd
import plotly.io

from elphick.geomet import IntervalSample
from elphick.geomet.datasets import datasets
from elphick.geomet.utils.pandas import MeanIntervalIndex
from elphick.geomet.utils.size import sizes_all

logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(module)s - %(funcName)s: %(message)s',
                    datefmt='%Y-%m-%dT%H:%M:%S%z')

Load Data

We load some real data.

df_data: pd.DataFrame = datasets.load_nordic_iron_ore_sink_float()
df_data

	size_retained	size_passing	density_lo	density_hi	mass_pct	Fe	SiO2	P	TiO2	V
0	1.000	NaN	NaN	NaN	0.4	22.7	50.9	0.042	0.170	0.0049
1	0.100	1.000	NaN	NaN	67.1	27.3	47.7	0.107	0.178	0.0062
2	0.063	0.100	NaN	NaN	12.7	18.0	57.8	0.440	0.200	0.0056
3	0.040	0.063	NaN	NaN	8.2	16.9	57.1	0.610	0.235	0.0057
4	0.000	0.040	NaN	NaN	11.6	19.4	51.6	0.650	0.310	0.0072
5	0.100	NaN	NaN	2.7	54.2	1.2	1.6	0.180	0.084	0.0030
6	0.100	NaN	2.7	3.3	9.7	16.6	42.6	0.980	0.380	0.0100
7	0.100	NaN	3.3	NaN	36.1	68.0	78.8	0.033	0.285	0.0120
8	0.063	0.100	NaN	2.7	24.0	1.2	79.5	0.015	0.060	0.0070
9	0.063	0.100	2.7	3.3	11.9	10.2	54.0	2.320	0.280	0.0080
10	0.063	0.100	3.3	NaN	64.1	67.1	1.4	0.174	0.530	0.0020
11	0.040	0.063	NaN	2.7	76.6	3.1	71.3	0.850	0.145	0.0130
12	0.040	0.063	2.7	3.3	4.3	28.5	24.5	2.780	0.460	0.1100
13	0.040	0.063	3.3	NaN	19.1	68.6	0.6	0.069	0.480	0.0130

The dataset contains size x assay, plus size x density x assay data. We’ll drop the size x assay data to leave the sink / float data.

df_sink_float: pd.DataFrame = df_data.dropna(subset=['density_lo', 'density_hi'], how='all').copy()
df_sink_float

	size_retained	size_passing	density_lo	density_hi	mass_pct	Fe	SiO2	P	TiO2	V
5	0.100	NaN	NaN	2.7	54.2	1.2	1.6	0.180	0.084	0.003
6	0.100	NaN	2.7	3.3	9.7	16.6	42.6	0.980	0.380	0.010
7	0.100	NaN	3.3	NaN	36.1	68.0	78.8	0.033	0.285	0.012
8	0.063	0.100	NaN	2.7	24.0	1.2	79.5	0.015	0.060	0.007
9	0.063	0.100	2.7	3.3	11.9	10.2	54.0	2.320	0.280	0.008
10	0.063	0.100	3.3	NaN	64.1	67.1	1.4	0.174	0.530	0.002
11	0.040	0.063	NaN	2.7	76.6	3.1	71.3	0.850	0.145	0.013
12	0.040	0.063	2.7	3.3	4.3	28.5	24.5	2.780	0.460	0.110
13	0.040	0.063	3.3	NaN	19.1	68.6	0.6	0.069	0.480	0.013

We will fill some nan values with assumptions

df_sink_float['size_passing'].fillna(1.0, inplace=True)
df_sink_float['density_lo'].fillna(1.5, inplace=True)
df_sink_float['density_hi'].fillna(5.0, inplace=True)

Check the mass_pct by size

mass_check: pd.DataFrame = df_sink_float[['size_passing', 'size_retained', 'mass_pct']].groupby(
    ['size_passing', 'size_retained']).sum()
# check that all are 100
assert np.all(mass_check['mass_pct'] == 100)

mass_check

		mass_pct
size_passing	size_retained
0.063	0.040	100.0
0.100	0.063	100.0
1.000	0.100	100.0

This indicates that the mass_pct column is actually a density_mass_pct column. We’ll rename that but also need to get the size_mass_pct values for those sizes from the size dataset

df_sink_float.rename(columns={'mass_pct': 'density_mass_pct'}, inplace=True)

df_size: pd.DataFrame = df_data.loc[np.all(df_data[['density_lo', 'density_hi']].isna(), axis=1), :].copy()
df_size.dropna(how='all', axis=1, inplace=True)
assert df_size['mass_pct'].sum() == 100

size_pairs = set(list((round(r, 5), round(p, 5)) for r, p in
                      zip(df_sink_float['size_retained'].values, df_sink_float['size_passing'].values)))
for r, p in size_pairs:
    df_sink_float.loc[(df_sink_float['size_retained'] == r) & (df_sink_float['size_passing'] == p), 'size_mass_pct'] = \
        df_size.loc[(df_size['size_retained'] == r) & (df_size['size_passing'] == p), 'mass_pct'].values[0]
# relocate the size_mass_pct column to the correct position, after size_passing
df_sink_float.insert(2, df_sink_float.columns[-1], df_sink_float.pop(df_sink_float.columns[-1]))
# add the mass_pct column
df_sink_float.insert(loc=6, column='mass_pct',
                     value=df_sink_float['density_mass_pct'] * df_sink_float['size_mass_pct'] / 100)
df_sink_float

	size_retained	size_passing	size_mass_pct	density_lo	density_hi	density_mass_pct	mass_pct	Fe	SiO2	P	TiO2	V
5	0.100	1.000	67.1	1.5	2.7	54.2	36.3682	1.2	1.6	0.180	0.084	0.003
6	0.100	1.000	67.1	2.7	3.3	9.7	6.5087	16.6	42.6	0.980	0.380	0.010
7	0.100	1.000	67.1	3.3	5.0	36.1	24.2231	68.0	78.8	0.033	0.285	0.012
8	0.063	0.100	12.7	1.5	2.7	24.0	3.0480	1.2	79.5	0.015	0.060	0.007
9	0.063	0.100	12.7	2.7	3.3	11.9	1.5113	10.2	54.0	2.320	0.280	0.008
10	0.063	0.100	12.7	3.3	5.0	64.1	8.1407	67.1	1.4	0.174	0.530	0.002
11	0.040	0.063	8.2	1.5	2.7	76.6	6.2812	3.1	71.3	0.850	0.145	0.013
12	0.040	0.063	8.2	2.7	3.3	4.3	0.3526	28.5	24.5	2.780	0.460	0.110
13	0.040	0.063	8.2	3.3	5.0	19.1	1.5662	68.6	0.6	0.069	0.480	0.013

Create MeanIntervalIndexes

size_intervals = pd.arrays.IntervalArray.from_arrays(df_sink_float['size_retained'], df_sink_float['size_passing'],
                                                     closed='left')
size_index = MeanIntervalIndex(size_intervals)
size_index.name = 'size'

density_intervals = pd.arrays.IntervalArray.from_arrays(df_sink_float['density_lo'], df_sink_float['density_hi'],
                                                        closed='left')
density_index = MeanIntervalIndex(density_intervals)
density_index.name = 'density'

df_sink_float.index = pd.MultiIndex.from_arrays([size_index, density_index])
df_sink_float.drop(columns=['size_retained', 'size_passing', 'density_lo', 'density_hi'], inplace=True)
df_sink_float

		size_mass_pct	density_mass_pct	mass_pct	Fe	SiO2	P	TiO2	V
size	density
[0.1, 1.0)	[1.5, 2.7)	67.1	54.2	36.3682	1.2	1.6	0.180	0.084	0.003
	[2.7, 3.3)	67.1	9.7	6.5087	16.6	42.6	0.980	0.380	0.010
	[3.3, 5.0)	67.1	36.1	24.2231	68.0	78.8	0.033	0.285	0.012
[0.063, 0.1)	[1.5, 2.7)	12.7	24.0	3.0480	1.2	79.5	0.015	0.060	0.007
	[2.7, 3.3)	12.7	11.9	1.5113	10.2	54.0	2.320	0.280	0.008
	[3.3, 5.0)	12.7	64.1	8.1407	67.1	1.4	0.174	0.530	0.002
[0.04, 0.063)	[1.5, 2.7)	8.2	76.6	6.2812	3.1	71.3	0.850	0.145	0.013
	[2.7, 3.3)	8.2	4.3	0.3526	28.5	24.5	2.780	0.460	0.110
	[3.3, 5.0)	8.2	19.1	1.5662	68.6	0.6	0.069	0.480	0.013

Create a 2D IntervalSample

interval_sample = IntervalSample(df_sink_float, name='SINK_FLOAT', moisture_in_scope=False, mass_dry_var='mass_pct')
print(interval_sample.is_2d_grid())
print(interval_sample.is_rectilinear_grid)

fig = interval_sample.plot_heatmap(components=['mass_pct'])
plotly.io.show(fig)

False
False

Upsample

We will upsample the data to a new grid

size_grid = sorted([s for s in sizes_all if s >= size_index.left.min() and s <= size_index.right.max()])
density_grid = np.arange(1.5, 5.1, 0.1)
new_grids: dict = {'size': size_grid, 'density': density_grid}

upsampled: IntervalSample = interval_sample.resample_2d(interval_edges=new_grids, precision=3)

pd.testing.assert_frame_equal(interval_sample.aggregate.reset_index(drop=True), upsampled.aggregate.reset_index(drop=True))

fig = upsampled.plot_heatmap(components=['mass_pct'])
plotly.io.show(fig)

(38, 36, 6)

Total running time of the script: (0 minutes 0.781 seconds)

Gallery generated by Sphinx-Gallery