"""
Numba version of pyimcom_croutines.c.
Slightly slower than C, used when furry-parakeet is not installed.
Functions
---------
iD5512C_getw
Interpolation code written by Python.
iD5512C
2D, 10x10 kernel interpolation for high accuracy.
iD5512C_sym
iD5512C for symmetrical output.
gridD5512C
iD5512C for rectangular grid.
lakernel1
PyIMCOM linear algebra kernel (eigendecomposition).
lsolve_sps
Routine to solve Ax=b.
build_reduced_T_wrap
Makes coaddition matrix T from a reduced space.
"""
import numpy as np
from numba import njit
@njit
[docs]
def iD5512C_getw(w: np.array, fh: float) -> None:
"""
Interpolation code written by Python.
Parameters
----------
w : np.array
Array, shape (10,); 1D interpolation weights will be written here.
fh : float
'xfh' and 'yfh' with 1/2 subtracted.
Returns
-------
None
"""
fh2 = fh * fh
e_ = (
((+1.651881673372979740e-05 * fh2 - 3.145538007199505447e-04) * fh2 + 1.793518183780194427e-03) * fh2
- 2.904014557029917318e-03
) * fh2 + 6.187591260980151433e-04
o_ = (
(
((-3.486978652054735998e-06 * fh2 + 6.753750285320532433e-05) * fh2 - 3.871378836550175566e-04)
* fh2
+ 6.279918076641771273e-04
)
* fh2
- 1.338434614116611838e-04
) * fh
w[0] = e_ + o_
w[9] = e_ - o_
e_ = (
((-1.146756217210629335e-04 * fh2 + 2.883845374976550142e-03) * fh2 - 1.857047531896089884e-02) * fh2
+ 3.147734488597204311e-02
) * fh2 - 6.753293626461192439e-03
o_ = (
(
((+3.121412120355294799e-05 * fh2 - 8.040343683015897672e-04) * fh2 + 5.209574765466357636e-03)
* fh2
- 8.847326408846412429e-03
)
* fh2
+ 1.898674086370833597e-03
) * fh
w[1] = e_ + o_
w[8] = e_ - o_
e_ = (
((+3.256838096371517067e-04 * fh2 - 9.702063770653997568e-03) * fh2 + 8.678848026470635524e-02) * fh2
- 1.659182651092198924e-01
) * fh2 + 3.620560878249733799e-02
o_ = (
(
((-1.243658986204533102e-04 * fh2 + 3.804930695189636097e-03) * fh2 - 3.434861846914529643e-02)
* fh2
+ 6.581033749134083954e-02
)
* fh2
- 1.436476114189205733e-02
) * fh
w[2] = e_ + o_
w[7] = e_ - o_
e_ = (
((-4.541830837949564726e-04 * fh2 + 1.494862093737218955e-02) * fh2 - 1.668775957435094937e-01) * fh2
+ 5.879306056792649171e-01
) * fh2 - 1.367845996704077915e-01
o_ = (
(
((+2.894406669584551734e-04 * fh2 - 9.794291009695265532e-03) * fh2 + 1.104231510875857830e-01)
* fh2
- 3.906954914039130755e-01
)
* fh2
+ 9.092432925988773451e-02
) * fh
w[3] = e_ + o_
w[6] = e_ - o_
e_ = (
((+2.266560930061513573e-04 * fh2 - 7.815848920941316502e-03) * fh2 + 9.686607348538181506e-02) * fh2
- 4.505856722239036105e-01
) * fh2 + 6.067135256905490381e-01
o_ = (
(
((-4.336085507644610966e-04 * fh2 + 1.537862263741893339e-02) * fh2 - 1.925091434770601628e-01)
* fh2
+ 8.993141455798455697e-01
)
* fh2
- 1.213035309579723942e00
) * fh
w[4] = e_ + o_
w[5] = e_ - o_
@njit
[docs]
def iD5512C(infunc: np.array, xpos: np.array, ypos: np.array, fhatout: np.array) -> None:
"""
2D, 10x10 kernel interpolation for high accuracy
Can interpolate multiple functions at a time from the same grid
so we don't have to keep recomputing weights.
Parameters
----------
infunc : np.array
Input function on some grid. Shape (nlayer, ngy, ngx).
xpos : np.array
Input x values. Shape (nout,).
ypos : np.array
Input y values. Shape (nout,).
fhatout : np.array
Location to put the output values. Shape : (nlayer, nout).
Returns
-------
None
"""
# extract dimensions
nlayer, ngy, ngx = infunc.shape
nout = xpos.size
wx = np.zeros((10,))
wy = np.zeros((10,))
# loop over points to interpolate
for ipos in range(nout):
# frac and integer parts of abscissae
x = xpos[ipos]
y = ypos[ipos]
xi = np.int32(x)
yi = np.int32(y)
# point off the grid, don't interpolate
if xi < 4 or xi >= ngx - 5 or yi < 4 or yi >= ngy - 5:
continue
# note 'xfh' and 'yfh' have 1/2 subtracted
iD5512C_getw(wx, x - xi - 0.5)
iD5512C_getw(wy, y - yi - 0.5)
# and the outputs; Numba does not support np.einsum
for ilayer in range(nlayer):
out = 0.0
for i in range(10):
interp_vstrip = 0.0
for j in range(10):
interp_vstrip += wx[j] * infunc[ilayer, yi - 4 + i, xi - 4 + j]
out += interp_vstrip * wy[i]
fhatout[ilayer, ipos] = out
@njit
[docs]
def iD5512C_sym(infunc: np.array, xpos: np.array, ypos: np.array, fhatout: np.array) -> None:
"""
2D, 10x10 kernel interpolation for high accuracy
Can interpolate multiple functions at a time from the same grid
so we don't have to keep recomputing weights.
This version assumes the output is symmetrical as a sqrt nout x sqrt nout matrix
Parameters
----------
infunc : np.array
Input function on some grid. Shape (nlayer, ngy, ngx).
xpos : np.array
Input x values. Shape (nout,).
ypos : np.array
Input y values. Shape (nout,).
fhatout : np.array
Location to put the output values. Shape (nlayer, nout).
Returns
-------
None
"""
# extract dimensions
nlayer, ngy, ngx = infunc.shape
nout = xpos.size
sqnout = np.int32(np.sqrt(nout + 1))
wx = np.zeros((10,))
wy = np.zeros((10,))
# loop over points to interpolate, but in this function only the upper half triangle
for ipos1 in range(sqnout):
for ipos2 in range(ipos1, sqnout):
ipos = ipos1 * sqnout + ipos2
# frac and integer parts of abscissae
x = xpos[ipos]
y = ypos[ipos]
xi = np.int32(x)
yi = np.int32(y)
# point off the grid, don't interpolate
if xi < 4 or xi >= ngx - 5 or yi < 4 or yi >= ngy - 5:
continue
# note 'xfh' and 'yfh' have 1/2 subtracted
iD5512C_getw(wx, x - xi - 0.5)
iD5512C_getw(wy, y - yi - 0.5)
# and the outputs; Numba does not support np.einsum
for ilayer in range(nlayer):
out = 0.0
for i in range(10):
interp_vstrip = 0.0
for j in range(10):
interp_vstrip += wx[j] * infunc[ilayer, yi - 4 + i, xi - 4 + j]
out += interp_vstrip * wy[i]
fhatout[ilayer, ipos] = out
# ... and now fill in the lower half triangle
for ipos1 in range(1, sqnout):
for ipos2 in range(ipos1):
ipos = ipos1 * sqnout + ipos2
ipos_sym = ipos2 * sqnout + ipos1
fhatout[:, ipos] = fhatout[:, ipos_sym]
@njit
[docs]
def gridD5512C(infunc: np.array, xpos: np.array, ypos: np.array, fhatout: np.array) -> None:
"""
2D, 10x10 kernel interpolation for high accuracy
this version works with output points on a rectangular grid so that the same
weights in x and y can be used for many output points
Parameters
----------
infunc : np.array
Input function on some grid. Shape (ngy, ngx).
xpos : np.array
Input x values. Shape (npi, nxo).
ypos : np.array
Input y values. Shape (npi, nyo).
fhatout : np.array
Location to put the output values. Shape (npi, nyo*nxo).
Returns
-------
None
Notes
-----
There are npi*nyo*nxo interpolations to be done in total,
but for each input pixel npi, there is an nyo x nxo grid of output points.
If you want a 3D array (npi, nyo, nxo), you can reshape `fhatout` after it
is filled.
"""
# extract dimensions
ngy, ngx = infunc.shape
npi, nxo = xpos.shape[:2]
nyo = ypos.shape[1]
wx_ar = np.zeros((nxo, 10))
wy_ar = np.zeros((nyo, 10))
xi = np.zeros((nxo,), dtype=np.int32)
yi = np.zeros((nyo,), dtype=np.int32)
# loop over points to interpolate
for i_in in range(npi):
# get the interpolation weights -- first in x, then in y.
# do all the output points simultaneously to save time
for ix in range(nxo):
x = xpos[i_in, ix]
xi[ix] = np.int32(x)
# point off the grid, don't interpolate
if xi[ix] < 4 or xi[ix] >= ngx - 5:
xi[ix] = 4
wx_ar[ix] = 0.0
continue
iD5512C_getw(wx_ar[ix], x - xi[ix] - 0.5)
# ... and now in y
for iy in range(nyo):
y = ypos[i_in, iy]
yi[iy] = np.int32(y)
# point off the grid, don't interpolate
if yi[iy] < 4 or yi[iy] >= ngy - 5:
yi[iy] = 4
wy_ar[iy] = 0.0
continue
iD5512C_getw(wy_ar[iy], y - yi[iy] - 0.5)
# ... and now we can do the interpolation
ipos = 0
for iy in range(nyo): # output pixel row
for ix in range(nxo): # output pixel column
out = 0.0
for i in range(10):
interp_vstrip = 0.0
for j in range(10):
interp_vstrip += wx_ar[ix, j] * infunc[yi[iy] - 4 + i, xi[ix] - 4 + j]
out += interp_vstrip * wy_ar[iy, i]
fhatout[i_in, ipos] = out
ipos += 1
@njit
[docs]
def lakernel1(
lam: np.array,
Q: np.array,
mPhalf: np.array,
C: float,
targetleak: float,
kCmin: float,
kCmax: float,
nbis: int,
kappa: np.array,
Sigma: np.array,
UC: np.array,
T: np.array,
smax: float,
) -> None:
"""
PyIMCOM linear algebra kernel (eigendecomposition).
Performs all the steps with the for loops (i.e., except the matrix diagonalization).
In the parameter descriptions, "n" refers to the number of input pixels and "m"
to the number of output pixels.
Parameters
----------
lam : np.array
System matrix eigenvalues. Shape (n,).
Q : np.array
System matrix eigenvectors. Shape (n, n).
(No longer used in this version of the algorithm.)
mPhalf : np.array
-P/2 = premultiplied target overlap matrix. Shape (m, n).
C : float
Target normalization.
targetleak : float
Allowable leakage of target PSF.
kCmin, kCmax : float, float
Range of kappa/C to test.
nbis : int
Number of bisections.
kappa : np.array
Lagrange multiplier per output pixel. Shape (m,).
Writes to this array.
Sigma : np.array
Output noise amplification. Shape (m,).
Writes to this array.
UC : np.array
Fractional squared error in PSF. Shape (m,).
Writes to this array.
T : np.array
Coaddition matrix, needs to be multiplied by Q^T after the return. Shape (m,n).
Writes to this array.
smax : float
Maximum allowed noise amplification Sigma.
Returns
-------
None
"""
# dimensions
m, n = mPhalf.shape
# now loop over pixels
for a in range(m):
factor = np.sqrt(kCmax / kCmin)
kap = np.sqrt(kCmax * kCmin)
for _ in range(nbis):
sum_ = sum2 = 0.0
for i in range(n):
var = mPhalf[a, i] / (lam[i] + kap)
sum2 += var * var
sum_ += (lam[i] + 2.0 * kap) * var * var
udc = 1.0 - sum_ / C
factor = np.sqrt(factor)
kap *= 1.0 / factor if (udc > targetleak and sum2 < smax) else factor
# report final results
sum_ = sum2 = 0.0
for i in range(n):
T[a, i] = var = mPhalf[a, i] / (lam[i] + kap)
sum2 += var * var
sum_ += (lam[i] + 2.0 * kap) * var * var
Sigma[a] = sum2
kappa[a] = kap
UC[a] = 1.0 - sum_ / C
@njit
[docs]
def lsolve_sps(N: int, A: np.array, x: np.array, b: np.array) -> None:
"""
Routine to solve Ax=b.
Only the lower triangle of A is ever used (the rest need not even be allocated).
The matrix A is destroyed.
Parameters
----------
N : int
Number of 'node' eigenvalues.
A : np.array, shape : (N, N)
Positive definite matrix. Shape (`N`, `N`).
x : np.array
Vector, output. Shape (`N`,).
b : np.array
Vector. Shape (`N`,).
Returns
-------
None
"""
# Replace A with its Cholesky decomposition
for i in range(N):
for j in range(i):
sum_ = 0.0
for k in range(j):
sum_ += A[i, k] * A[j, k]
A[i, j] = (A[i, j] - sum_) / A[j, j]
sum_ = 0.0
for k in range(i):
sum_ += A[i, k] * A[i, k]
A[i, i] = np.sqrt(A[i, i] - sum_)
# ... now the lower part of A is the Cholesky decomposition L: A = LL^T
# now get p1 = LT-1 b
p1 = np.empty((N,))
for i in range(N):
sum_ = 0.0
for j in range(i):
sum_ += A[i, j] * p1[j]
p1[i] = (b[i] - sum_) / A[i, i]
# ... and x = L^-1 p1
for i in range(N - 1, -1, -1):
sum_ = 0.0
for j in range(i + 1, N):
sum_ += A[j, i] * x[j]
x[i] = (p1[i] - sum_) / A[i, i]
@njit
[docs]
def build_reduced_T_wrap(
Nflat: np.array,
Dflat: np.array,
Eflat: np.array,
kappa: np.array,
ucmin: float,
smax: float,
out_kappa: np.array,
out_Sigma: np.array,
out_UC: np.array,
out_w: np.array,
) -> None:
"""
Intermediate quantities to build coaddition matrix T from a reduced space.
In the parameter descriptions, "m" refers to the number of output pixels and
"nv" to the number of kappa nodes.
Parameters
----------
Nflat : np.array
Input noise array. Shape (m*nv*nv,). Flattened version of (m,nv,nv).
Dflat : np.array
Input 1st order signal D/C. Shape (m*nv,). Flattened version of (m,nv).
Eflat : np.array
Input 2nd order signal E/C. Shape (m*nv*nv,). Flattened version of (m,nv,nv).
kappa : np.array
List of eigenvalues, must be sorted ascending. Shape (nv,).
ucmin : float
Min U/C.
smax : float
Max Sigma (noise).
out_kappa : np.array
Output "kappa" parameter. Shape (m,).
out_Sigma : np.array
Output "Sigma". Shape (m,).
out_UC : np.array
Output "U/C". Shape (m,).
out_w : np.array
Output weights for each eigenvalue and each output pixel.
Shape (m*nv,). Flattened version of (m,nv).
Returns
-------
None
"""
# dimensions
nv = kappa.size # number of 'node' eigenvalues (must be >=2)
m = out_kappa.size # number of output pixels
nv2 = nv * nv
# allocate memory
M2d = np.empty((nv, nv))
w = np.empty((nv,))
# loop over output pixels
for a in range(m):
# first figure out the range of kappa
iv = nv - 1
UC = ucmin * 10
S = smax / 10 # do {...}
while iv > 0 and ucmin < UC and smax > S:
iv -= 1
S = Nflat[a * nv2 + iv * (nv + 1)] # diagonal noises
UC = 1.0 - 2.0 * Dflat[a * nv + iv] + Eflat[a * nv2 + iv * (nv + 1)] # diagonal U/C
# kappa should be in the range kappa[iv] .. kappa[iv+1]
kappamid = np.sqrt(kappa[iv] * kappa[iv + 1])
factor = np.power(kappa[iv + 1] / kappa[iv], 0.25)
# iterative loop to find 'best' kappa
for _ in range(12):
# build matrix for this kappa
for iv in range(nv):
for jv in range(iv + 1):
M2d[iv, jv] = Eflat[a * nv2 + iv + nv * jv] + kappamid * Nflat[a * nv2 + iv + nv * jv]
# ... and get weights
lsolve_sps(nv, M2d, w, Dflat[a * nv : (a + 1) * nv])
out_w[a * nv : (a + 1) * nv] = w # pointer to weights for this pixel
# now get the UC and the S
S = 0.0
for iv in range(nv):
sum_ = 0.0
for jv in range(nv):
sum_ += Nflat[a * nv2 + iv + nv * jv] * w[jv]
S += sum_ * w[iv]
UC = 1.0 - kappamid * S
for iv in range(nv):
UC -= Dflat[a * nv + iv] * w[iv]
# updates to kappa
kappamid *= 1.0 / factor if ucmin < UC and smax > S else factor
factor = np.sqrt(factor)
# output other information
out_kappa[a] = kappamid
out_Sigma[a] = S
out_UC[a] = UC