import numpy as np
import pandas as pd
from math import pi, floor
import hsluv
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = [14, 7]
Measured data are layed out in one file per density, and each file contains a lines with two values, namely the number of pixels of the grain of interest that are visible, and the number of background pixels that are visible (ie pixels where the closest grain is further than Z_max). A file repeats sample_count times the same measures, so that it gives data point in the following order:
sample=1, Z=0
sample=1, Z=0.01 Z_max
...
sample=1, Z=Z_max
sample=2, Z=0
sample=2, Z=0.01 Z_max
...
sample=2, Z=Z_max
...
# PARAMETERS
# template of experiment's result file, %x replaced by density
file_tpl = "C:/Elie/Data/SandViewer/Measures/Shadowing/d%.3f.txt"
# densities for which the experiments have been run, as an inversed proportion of the max stack density
densities = np.arange(1.0, 2.075, 0.025)
# depths for which the experiments have been run
zs = np.linspace(0.00, 0.1, 100)
# radius of a grain in the experiments (use to show plots in percentage of radius)
radius = 0.00574232
# infer sample count from the content of the first file
density_frames = densities.shape[0]
z_frames = zs.shape[0]
df = pd.read_csv(file_tpl % densities[0], sep=';')
sample_count = floor(df.values.shape[0] / z_frames)
# Load data into an array parametrized by (density, depth, sample)
data = np.zeros((density_frames, z_frames, sample_count, 2))
for i, x in enumerate(densities):
df = pd.read_csv(file_tpl % x, sep=';')
data[i,:,:,:] = df.values[:z_frames * sample_count,1:].reshape(sample_count, z_frames, 2).transpose(1, 0, 2)
max_black = 2710 # maximum number of black pixels (pixels of the grain of interest) given its size
for i in np.unique(np.argwhere(data[:,:,:,0] > max_black)[:,0]):
print("Error with curve #{} (density={}): improbable number of black pixels".format(i, densities[i]))
continue
def color(t):
return hsluv.hsluv_to_rgb([
340 * (1 - t) + 250 * t,
100 * (1 - pow(t,.5)) + 60 * pow(t,.5),
50 * (1 - t) + 50 * t,
])
fig, ax = plt.subplots()
ax.set_xlabel('depth z')
ax.set_ylabel('Fraction of visibility')
for i, d in enumerate(densities[:-2]):
if i % 2 != 0: continue
curve_data = data[i,:,:,0].mean(axis=1) / max_black
ax.plot(zs / radius, curve_data, label="d=%.03f"%(1./d), color=color(i/(len(densities)-1)))
ax.plot([zs[-1]*1.15 / radius], [0])
ax.legend(loc='best')
ax.set_title("Mean visibility of a grain at depth z in a sand tract\nwhose density is d times the maximum stacking density without grain overlap.\nAveraged over 1000 samples, with grains of radius 1.")
def plot_stddev(i):
fig, ax = plt.subplots()
ax.set_xlabel('depth z')
ax.set_ylabel('Fraction of visibility')
mean_curve = data[i,:,:,0].mean(axis=1) / max_black
ax.plot(zs / radius, mean_curve, label="mean", color=color(0))
stddev_data = data[i,:,:,0].std(axis=1) / max_black
ax.plot(zs / radius, np.minimum(1, mean_curve + stddev_data), label="mean+stddev", color=color(1))
ax.plot(zs / radius, np.maximum(0, mean_curve - stddev_data), label="mean-stddev", color=color(1))
ax.plot([zs[-1]*1.15 / radius], [0])
ax.legend(loc='best')
ax.set_title("Mean visibility of a grain and its standard deviation for d=%.03f" % (1./densities[i]))
plot_stddev(0)
plot_stddev(len(densities) // 2)
plot_stddev(len(densities) - 1)
R = 0.1
z = zs / radius * R
max_density = 0.74 / (4./3. * pi * (R*R*R))
ds = max_density / densities[:-2:2]
First issue The result should be invariant in the ratio R/z. It is indeed, unless $1-\pi R^2$ becomes negative.
Second issue This $1-\pi R^2$ is not homogeneous.
fig, ax = plt.subplots()
ax.set_xlabel('depth z')
ax.set_ylabel('Fraction of visibility')
for i, d in enumerate(ds):
pva = np.minimum(np.power(np.maximum(0.0, 1.0 - pi * R * R), d * z), 1.0)
ax.plot(z / R, pva, label="d=%.03f"%(d/max_density), color=color(i/(len(ds)-1)))
ax.plot([zs[-1]*1.15 / radius], [0])
ax.legend()
ax.set_title("Theoretical mean visibility of a grain at depth z in a sand tract of density is d*max")
Third issue The results don't match.
fig, ax = plt.subplots()
ax.set_xlabel('depth z')
ax.set_ylabel('Fraction of visibility')
for i, d in enumerate(ds):
pva = np.exp(- pi * R * R * d * z)
ax.plot(z / R, pva, label="d=%.03f"%(d/max_density), color=color(i/(len(ds)-1)))
ax.plot([zs[-1]*1.15 / radius], [0])
ax.legend()
ax.set_title("Theoretical mean visibility of a grain at depth z in a sand tract of density is d*max")
x = zs / radius
degrees = np.arange(1, 10, 1, dtype=np.int32)
res = np.zeros((len(degrees), len(densities)))
for j, deg in enumerate(degrees):
for i, d in enumerate(densities):
y = data[i,:,:,0].mean(axis=1) / max_black
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, y, deg, full=True)
res[j, i] = residuals[0]
fig, ax = plt.subplots()
ax.set_xlabel('density')
ax.set_ylabel('residuals')
for j, deg in enumerate(degrees):
pva = np.exp(- pi * R * R * d * z)
ax.plot(1./densities, res[j], label="degree=%d" % deg)
ax.legend()
ax.set_title("Polynomial fitting residuals")
Clearly it is not fitting.
# sanity check about the use of np.polyfit
x = zs / radius
p = [1.0, 0.2, -1.6, -1.3, -1.2]
curve_data = eval_polynomial(x, p)
(p2, residuals, rank, singular_values, rcond) = np.polyfit(x, curve_data, 5, full=True)
fig, ax = plt.subplots()
ax.plot(x, curve_data)
ax.plot(x, eval_polynomial(x, p2))
residuals
Ok.
Model is $\exp(P(z))$ where $P$ is a polynomial function.
x = zs / radius
degrees = np.arange(1, 10, 1, dtype=np.int32)
res = np.zeros((len(degrees), len(densities)))
for j, deg in enumerate(degrees):
for i, d in enumerate(densities):
y = np.log(data[i,:,:,0].mean(axis=1) / max_black)
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, y, deg, full=True)
res[j, i] = residuals[0]
fig, ax = plt.subplots()
ax.set_xlabel('density')
ax.set_ylabel('residuals')
for j, deg in enumerate(degrees):
pva = np.exp(- pi * R * R * d * z)
ax.plot(1./densities, res[j], label="degree=%d" % deg)
ax.legend()
ax.set_title("Exponential polynomial fitting residuals")
Let's drop degree 1.
x = zs / radius
degrees = np.arange(2, 10, 1, dtype=np.int32)
res = np.zeros((len(degrees), len(densities)))
for j, deg in enumerate(degrees):
for i, d in enumerate(densities):
y = np.log(data[i,:,:,0].mean(axis=1) / max_black)
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, y, deg, full=True)
res[j, i] = residuals[0]
fig, ax = plt.subplots()
ax.set_xlabel('density')
ax.set_ylabel('residuals')
for j, deg in enumerate(degrees):
pva = np.exp(- pi * R * R * d * z)
ax.plot(1./densities, res[j], label="degree=%d" % deg)
ax.legend()
ax.set_title("Exponential polynomial fitting residuals")
It's better for the tail, ie for lower densities, but not right for the beginning. So we'll focus now on higher densities. But actually the error is itself exponential so it is not perfect for lower densities neither.
x = zs / radius
first=15
focused_densities = densities[first:]
degrees = np.arange(2, 10, 2, dtype=np.int32)
res = np.zeros((len(degrees), len(focused_densities)))
for j, deg in enumerate(degrees):
for i, d in enumerate(focused_densities):
y = np.log(data[first+i,:,:,0].mean(axis=1) / max_black)
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, y, deg, full=True)
res[j, i] = residuals[0]
fig, ax = plt.subplots()
ax.set_xlabel('density')
ax.set_ylabel('residuals')
for j, deg in enumerate(degrees):
pva = np.exp(- pi * R * R * d * z)
ax.plot(1./focused_densities, res[j], label="degree=%d" % deg)
ax.legend()
ax.set_title("Exponential polynomial fitting residuals (focus on lower densities)")
def eval_polynomial(x, p):
e = 0
for k in p:
e = e * x + k
return e
def color2(t):
return hsluv.hsluv_to_rgb([
50 * (1 - t) + 150 * t,
100 * (1 - pow(t,.5)) + 60 * pow(t,.5),
50 * (1 - t) + 50 * t,
])
x = zs / radius
first=15
focused_densities = densities[first:]
fig, ax = plt.subplots()
ax.set_xlabel('depth z')
ax.set_ylabel('Fraction of visibility')
for i, d in enumerate(focused_densities):
if i % 2 != 0: continue
curve_data = data[first+i,:,:,0].mean(axis=1) / max_black
ax.plot(x, curve_data, label="d=%.03f"%(1./d), color=color(i/(len(densities)-1)))
# exp polynomial model
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, np.log(curve_data), 5, full=True)
curve_fit = np.exp(eval_polynomial(x, p))
ax.plot(x, curve_fit, label="d=%.03f fitted"%(1./d), color=color2(i/(len(densities)-1)))
ax.plot([zs[-1]*1.15 / radius], [0])
ax.legend()
The error is mainly concentrated on the lower z values.
x = zs[:20] / radius
first=15
focused_densities = densities[first:]
fig, ax = plt.subplots()
ax.set_xlabel('depth z')
ax.set_ylabel('Fraction of visibility')
for i, d in enumerate(focused_densities):
if i % 2 != 0: continue
curve_data = data[first+i,:20,:,0].mean(axis=1) / max_black
ax.plot(x, curve_data, label="d=%.03f"%(1./d), color=color(i/(len(densities)-1)))
# exp polynomial model
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, np.log(curve_data), 5, full=True)
curve_fit = np.exp(eval_polynomial(x, p))
ax.plot(x, curve_fit, label="d=%.03f fitted"%(1./d), color=color2(i/(len(densities)-1)))
ax.plot([zs[20]*1.15 / radius], [0.4])
ax.legend()
It's ok enough.
z_count = 20
x = zs[:z_count] / radius
focus_densities = densities[:-35]
fig, ax = plt.subplots()
ax.set_xlabel('depth z')
ax.set_ylabel('Fraction of visibility')
for i, d in enumerate(focus_densities):
if i % 2 != 0: continue
curve_data = data[i,:z_count,:,0].mean(axis=1) / max_black
ax.plot(x, curve_data, label="d=%.03f"%(1./d), color=color(i/(len(focus_densities)-1)))
# exp polynomial model
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, np.log(curve_data), 5, full=True)
curve_fit = np.exp(eval_polynomial(x, p))
ax.plot(x, curve_fit, label="d=%.03f fitted"%(1./d), color=color2(i/(len(focus_densities)-1)))
ax.plot([x[-1]*1.0], [1])
ax.legend()
z_count = 20
x = zs / radius
focus_densities = densities[:-35]
fig, ax = plt.subplots()
ax.set_xlabel('depth z')
ax.set_ylabel('Fraction of visibility')
# Reference
curve_data = data[0,:,:,0].mean(axis=1) / max_black
ax.plot(x[:z_count], curve_data[:z_count], label="reference", color=color(0))
# exp polynomial models
for i, deg in enumerate([1, 3, 5]):
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, np.log(curve_data), deg, full=True)
curve_fit = np.exp(eval_polynomial(x, p))
ax.plot(x[:z_count], curve_fit[:z_count], label="exp(P_%d)"%deg, color=color2(i/2.))
# polynomial models
for i, deg in enumerate([1, 3, 5]):
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, curve_data, deg, full=True)
curve_fit = eval_polynomial(x, p)
ax.plot(x[:z_count], curve_fit[:z_count], label="P_%d"%deg, color=color(1-i/3.))
ax.legend()
ax.set_title("Different fitting models for d=1.0")
z_count = 40
x = zs / radius
focus_densities = densities[:-35]
fig, ax = plt.subplots()
ax.set_xlabel('depth z')
ax.set_ylabel('Fraction of visibility')
# Reference
curve_data = data[0,:,:,0].mean(axis=1) / max_black
ax.plot(x[:z_count], curve_data[:z_count], label="reference", color=color(0))
# exp polynomial models
for i, deg in enumerate([4, 5, 6]):
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, np.log(curve_data), deg, full=True)
curve_fit = np.exp(eval_polynomial(x, p))
ax.plot(x[:z_count], curve_fit[:z_count], label="exp(P_%d)"%deg, color=color2(i/2.))
# polynomial models
for i, deg in enumerate([2, 3, 5]):
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, curve_data, deg, full=True)
curve_fit = eval_polynomial(x, p)
ax.plot(x[:z_count], curve_fit[:z_count], label="P_%d"%deg, color=color(1-i/3.))
ax.legend()
ax.set_title("Different fitting models for d=1.0")
z_count = 60
x = zs / radius
focus_densities = densities[:-35]
fig, ax = plt.subplots()
ax.set_xlabel('depth z')
ax.set_ylabel('Fraction of visibility')
# Reference
curve_data = data[0,:,:,0].mean(axis=1) / max_black
ax.plot(x[:z_count], curve_data[:z_count], label="reference", color=color(0))
# exp polynomial models
for i, deg in enumerate([5, 6]):
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, np.log(curve_data), deg, full=True)
curve_fit = np.exp(eval_polynomial(x, p))
ax.plot(x[:z_count], curve_fit[:z_count], label="exp(P_%d)"%deg, color=color2(i/2.))
# polynomial models
for i, deg in enumerate([3, 5]):
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, curve_data, deg, full=True)
curve_fit = eval_polynomial(x, p)
ax.plot(x[:z_count], curve_fit[:z_count], label="P_%d"%deg, color=color(1-i/3.))
ax.legend()
ax.set_title("Different fitting models for d=1.0")
z_count = 20
x = zs / radius
focus_densities = densities[:-35]
fig, ax = plt.subplots()
ax.set_xlabel('depth z')
ax.set_ylabel('Fraction of visibility')
# Reference
curve_data = data[0,:,:,0].mean(axis=1) / max_black
ax.plot(x[:z_count], curve_data[:z_count], label="reference", color=color(0))
# exp polynomial models
for i, deg in enumerate([5, 6, 7, 8]):
(p, residuals, rank, singular_values, rcond) = np.polyfit(x, np.log(curve_data), deg, full=True)
curve_fit = np.exp(eval_polynomial(x, p))
ax.plot(x[:z_count], curve_fit[:z_count], label="exp(P_%d)"%deg, color=color2(i/2.))
ax.legend()
ax.set_title("Different fitting models for d=1.0")
The beginning of the curve seems linear. We'll see up to which point it is "linear enough", as seen by the regression residuals.
# Used in all this section
x = zs / radius
ds = 1./densities
curves = curve_data = data[:,:,:,0].mean(axis=2) / max_black
res = np.zeros(x.shape[0] - 2)
for i in range(res.shape[0]):
# fit curve between 0th and ith z value
(p, residuals, _, __, ___) = np.polyfit(x[:2+i], curves[0,:2+i], 1, full=True)
res[i] = residuals[0] if residuals.shape[0] > 0 else 0
fig, ax = plt.subplots()
ax.plot(np.arange(res.shape[0]) + 2, res)
ax.set_xlabel("End index of the fitted sub-curve")
ax.set_ylabel("Fitting residual")
ax.set_title("Residuals of linear fitting for sub-curve with density = 1.0")
For several densities now.
nd = densities.shape[0]
res = np.zeros((nd, x.shape[0] - 2))
for j in range(nd):
for i in range(res.shape[1]):
# fit curve between 0th and ith z value
(p, residuals, _, __, ___) = np.polyfit(x[:2+i], curves[j,:2+i], 1, full=True)
res[j,i] = residuals[0] if residuals.shape[0] > 0 else 0
d_indices = np.linspace(0, nd - 1, 5, dtype=np.int32)
fig, ax = plt.subplots()
for j in d_indices:
ax.plot(np.arange(res.shape[1]) + 2, res[j], label="d=%.03f"%(1./densities[j]))
ax.legend()
ax.set_xlabel("End index of the fitted sub-curve")
ax.set_ylabel("Fitting residual")
ax.set_title("Residuals of linear fitting for sub-curve with various densities")
Zoom on the lowest residuals:
d_indices = np.linspace(0, nd - 1, 5, dtype=np.int32)
fig, ax = plt.subplots()
for j in d_indices:
ax.plot(np.arange(res.shape[1]) + 2, res[j], label="d=%.03f"%(1./densities[j]))
ax.legend()
ax.set_xlabel("End index of the fitted sub-curve")
ax.set_ylabel("Fitting residual")
ax.set_ylim(0.0, 0.1)
ax.set_title("Residuals of linear fitting for sub-curve with various densities")
d_indices = np.linspace(0, nd - 1, 5, dtype=np.int32)
fig, ax = plt.subplots()
for j in d_indices:
ax.plot(np.arange(res.shape[1]) + 2, res[j], label="d=%.03f"%(1./densities[j]))
ax.legend()
ax.set_xlabel("End index of the fitted sub-curve")
ax.set_ylabel("Fitting residual")
ax.set_ylim(0.0, 0.01)
ax.set_title("Residuals of linear fitting for sub-curve with various densities")
There is no plateau at the beginning as I expected. But let's plot the index at which linear regression residual goes over a given threshold (say 0.05) as a function of the density.
threshold = 0.05
curve = np.zeros(res.shape[0])
for i in range(res.shape[0]):
curve[i] = np.where(res[i] < threshold)[0][-1]
curve
fig, ax = plt.subplots()
for threshold in [0.01, 0.05, 0.1]:
curve = np.zeros(res.shape[0])
for i in range(res.shape[0]):
curve[i] = np.where(res[i] < threshold)[0][-1]
ax.plot(1./densities, curve / x.shape[0] * x[-1], label="residual > %.03f" % threshold)
ax.legend()
ax.set_xlabel("Density")
ax.set_ylabel("Limit depth z")
ax.set_ylim(0, 15)
ax.set_title("Depth beyond which the linear model fails with a residual > threshold")
Still not a convincing solution for higher densities, but the exponential residuals suggest a model of the form:
$$ A\exp(f(z)) + B $$where $f \mapsto 0$ when $x \mapsto 0$
threshold = 0.01
cutoff = np.zeros(res.shape[0], dtype=np.int32)
for i in range(res.shape[0]):
cutoff[i] = np.where(res[i] < threshold)[0][-1]
coefs = np.zeros((nd, 2)) # As and Bs
for j in range(nd):
i = cutoff[j]
# fit curve between 0th and ith z value
(p, residuals, _, __, ___) = np.polyfit(x[:2+i], curves[j,:2+i], 1, full=True)
coefs[j] = p
fig, ax = plt.subplots()
ax.plot(1./densities, coefs[:,0])
ax.set_xlabel("Density d")
ax.set_title("A coefficients")
fig, ax = plt.subplots()
ax.plot(1./densities, coefs[:,1])
ax.set_xlabel("Density d")
ax.set_title("B coefficients")
Ok, let's say B = cst.
print("B = {:.04f} +- {:.04f}".format(coefs[:,1].mean(), coefs[:,1].std()))
And let's fit something on A
p, res, _, __, ___ = np.polyfit(1./densities, coefs[:,0], 2, full=True)
p, res
fig, ax = plt.subplots()
ax.plot(ds, coefs[:,0], label="ground truth")
ax.plot(ds, eval_polynomial(ds, p), label="P_2")
ax.legend()
ax.set_xlabel("Density d")
ax.set_title("A coefficients")
pA = p
B = coefs[:,1].mean()
# Now, we look for Visibility = eval_polynomial(ds, pA) * exp(f(x)) + B
# so:
# f(x) = log((Visibility - B) / eval_polynomial(ds, pA))
Trying to fit some models on f()
fig, ax = plt.subplots()
for i in range(20):
f = np.log((curves[i][1:] - B) / eval_polynomial(ds[i], pA))
ax.plot(x[1:], f, label="d=%.03f"%ds[i])
ax.legend()
ax.set_xlabel("Depth z")
ax.set_ylabel("f(z)")
fig, ax = plt.subplots()
for i in range(20):
f = np.log((curves[i][1:] - B) / eval_polynomial(ds[i], pA))
ax.plot(x[1:], 1.0/f, label="d=%.03f"%ds[i])
ax.legend(loc='right')
ax.set_xlabel("Depth z")
ax.set_ylabel("1/f(z)")
fig, ax = plt.subplots()
for i in range(20):
f = np.log((curves[i][1:] - B) / eval_polynomial(ds[i], pA))
ax.plot(x[1:], 1.0/np.exp(f), label="d=%.03f"%ds[i])
ax.legend(loc='right')
ax.set_xlabel("Depth z")
ax.set_ylabel("1/exp(f(z))")
fig, ax = plt.subplots()
for i in range(20):
f = np.log((curves[i][1:] - B) / eval_polynomial(ds[i], pA))
ax.plot(x[1:], np.exp(f), label="d=%.03f"%ds[i])
ax.legend(loc='right')
ax.set_xlabel("Depth z")
ax.set_ylabel("exp(f(z))")
fig, ax = plt.subplots()
for i in range(20):
g = (curves[i][1:])
ax.plot(x[1:], g, label="d=%.03f"%ds[i])
ax.legend(loc='right')
ax.set_xlabel("Depth z")
ax.set_ylabel("g(z)")
where $a$ is a slope and $b$ is a smoothness.
$$ \log(e^\text{PVA} - 1) = -ax-b $$fig, ax = plt.subplots()
for i in range(20):
g = np.log(np.exp(curves[i][1:]) - 1)
ax.plot(x[1:], g, label="d=%.03f"%ds[i])
ax.legend(loc='right')
ax.set_xlabel("Depth z")
ax.set_ylabel("g(z)")
coefs = np.zeros((ds.shape[0], 2))
for i in range(ds.shape[0]):
g = -np.log(np.exp(curves[i]) - 1)
p, residuals, _, __, ___ = np.polyfit(x[:50], g[:50], 1, full=True)
coefs[i,:] = p
print("#{}: residuals = {}".format(i, residuals[0]))
fig, ax = plt.subplots()
for i in range(10):
g = -np.log(np.exp(curves[2*i]) - 1)
ax.plot(x, g, label="d=%.03f"%ds[2*i], color=color(i/10))
ax.plot(x, eval_polynomial(x, coefs[2*i]), label="P_1", color=color2(1-i/10))
ax.legend(loc='right')
ax.set_xlabel("Depth z")
ax.set_ylabel("g(z)")
fig, ax = plt.subplots()
for i in range(5):
pva = curves[3*i]
ax.plot(x, pva, label="d=%.03f"%ds[3*i], color=color(i/10))
fit = np.log(1+np.exp(-eval_polynomial(x, coefs[3*i])))
ax.plot(x, fit, label="P_1", color=color2(1-i/10))
ax.legend(loc='right')
ax.set_xlabel("Depth z")
ax.set_ylabel("g(z)")
Nope, still not fitting correctly.