todos
There are 427 todo items that need to be addressed in starry
.
kepler.py:
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071 | .. note::
Users may call the :py:meth:`draw` method of this class to draw
from the posterior after calling :py:meth:`solve`.
"""
# TODO: Implement for spectral maps?
self._no_spectral()
# Check that the data is set
if self._flux is None or self._C is None:
raise ValueError("Please provide a dataset with `set_data()`.")
# Get the full design matrix
|
kepler.py:
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227 | Returns:
lnlike: The log marginal likelihood.
"""
# TODO: Implement for spectral maps?
self._no_spectral()
# Check that the data is set
if self._flux is None or self._C is None:
raise ValueError("Please provide a dataset with `set_data()`.")
# Get the full design matrix
|
maps.py:
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091 | ), "Differential rotation orders above 3 are not supported."
if drorder > 0:
assert ydeg > 0, "Differential rotation requires `ydeg` >= 1."
# TODO: phase this next warning out
logger.warning(
"Differential rotation is still an experimental feature. "
"Use it with care."
)
Ddeg = (4 * drorder + 1) * ydeg
if Ddeg >= 50:
|
maps.py:
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109 | # Limb-darkened?
if (ydeg == 0) and (rv is False) and (reflected is False):
# TODO: Add support for wavelength-dependent limb darkening
if nw is not None:
raise NotImplementedError(
"Multi-wavelength limb-darkened maps are not yet supported."
)
Bases = (LimbDarkenedBase, MapBase)
else:
|
core.py:
79
80
81
82
83
84
85
86
87
88
89
90 | self._tensordotRz = tensordotRzOp(self._c_ops.tensordotRz)
self._dotR = dotROp(self._c_ops.dotR)
# Filter
# TODO: Make the filter operator sparse
self._F = FOp(self._c_ops.F, self._c_ops.N, self._c_ops.Ny)
# Differential rotation
self._tensordotD = tensordotDOp(self._c_ops.tensordotD)
# Misc
self._spotYlm = spotYlmOp(self._c_ops.spotYlm, self.ydeg, self.nw)
|
core.py:
102
103
104
105
106
107
108
109
110
111
112 | self._minimize = minimizeOp(
self.intensity, self.P, self.ydeg, self.udeg, self.fdeg
)
else:
# TODO: Implement minimization for spectral maps?
self._minimize = None
self._LimbDarkIsPhysical = LDPhysicalOp(_c_ops.nroots)
@property
def rT(self):
return self._rT
|
core.py:
364
365
366
367
368
369
370
371
372
373
374
375 | if self.ydeg == 0:
return M
# Rotate to the sky frame
# TODO: Do this in a single compound rotation
M = self.dotR(
self.dotR(
self.dotR(
M,
-tt.cos(obl),
-tt.sin(obl),
math.to_tensor(0.0),
|
core.py:
465
466
467
468
469
470
471
472
473
474
475
476 | -theta,
)
# Rotate to the sky frame
# TODO: Do this in a single compound rotation
MT = self.dotR(
self.dotR(
self.dotR(
MT,
math.to_tensor(1.0),
math.to_tensor(0.0),
math.to_tensor(0.0),
|
core.py:
627
628
629
630
631
632
633
634
635
636
637
638 | The method `render` requires a bunch of dummy params for
compatibility with the `System` class. This method is a
convenience method for use in the `Map` class.
"""
# TODO: There may be a bug in Theano related to
# tt.mgrid; I get different results depending on whether the
# function is compiled using `theano.function()` or if it
# is evaluated using `.eval()`. The small perturbation to `res`
# is a temporary fix that ensures that `y` and `x` are of the
# correct length in all cases I've tested.
# Compute the Cartesian grid
|
core.py:
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464 | sec_veq,
keplerian,
):
"""Compute the observed system radial velocity (RV maps only)."""
# TODO: This method is currently very inefficient, as it
# calls `X` twice per call and instantiates an `orbit`
# instance up to three separate times per call. We should
# re-code the logic from `X()` in here to optimize it.
# Compute the RV filter
pri_f = self.primary.map.ops.compute_rv_filter(
pri_inc, pri_obl, pri_veq, pri_alpha
|
utils.py:
53
54
55
56
57
58
59
60
61
62
63
64 | - an integer (`int`, `np.int`, `np.int16`, `np.int32`, `np.int64`)
- a numpy boolean (`np.array(True)`, `np.array(False)`)
- a numpy float array with ndim equal to 0, 1, 2, or 3
# TODO: Cast lists to arrays and floats to np.array(float)
"""
ttype = type(arg)
if is_theano(arg):
return ttype
else:
if ttype in integers:
|
polybasis.py:
65
66
67
68
69
70
71
72
73
74 | def perform(self, node, inputs, outputs):
x, y, z, bpT = inputs
# TODO: When any of the coords are zero, there's a div
# by zero below. This hack fixes the issue. We should
# think of a better way of doing this!
tol = 1e-8
x[np.abs(x) < tol] = tol
y[np.abs(y) < tol] = tol
z[np.abs(z) < tol] = tol
|
basis.h:
43
44
45
46
47
48
49
50
51
52
53 | /**
Compute the `P(z)` part of the Ylm vectors.
TODO: This can be sped up with sparse algebra.
*/
template <typename Scalar>
inline void legendre(int lmax,
std::vector<std::vector<Eigen::Triplet<Scalar>>> &M) {
// Compute densely
int N = (lmax + 1) * (lmax + 1);
|
filter.h:
247
248
249
250
251
252
253
254
255
256
257 | Vector<Scalar> pf;
pf = B.A1_f * f;
// Multiply them
// TODO: DpDpf and DpDpu are sparse, and we should probably exploit that
Vector<Scalar> p;
if (udeg > fdeg) {
computePolynomialProduct(udeg, pu, fdeg, pf, DpDpu, DpDpf);
} else {
computePolynomialProduct(fdeg, pf, udeg, pu, DpDpf, DpDpu);
}
|
limbdark.h:
| \file limbdark.h
\brief Limb darkening utilities from Agol, Luger & Foreman-Mackey (2019).
TODO: Loop downward in `v` until `J[v] != 0`
TODO: Test all special cases
*/
#ifndef _STARRY_LIMBDARK_H_
#define _STARRY_LIMBDARK_H_
|
limbdark.h:
2
3
4
5
6
7
8
9
10
11
12
13 | \file limbdark.h
\brief Limb darkening utilities from Agol, Luger & Foreman-Mackey (2019).
TODO: Loop downward in `v` until `J[v] != 0`
TODO: Test all special cases
*/
#ifndef _STARRY_LIMBDARK_H_
#define _STARRY_LIMBDARK_H_
#include <cmath>
|
misc.h:
96
97
98
99
100
101
102
103
104
105
106
107 | RowVector<Scalar> &bamp,
Scalar &bsigma, Scalar &blat, Scalar &blon) {
// Forward diff for sigma
// TODO: Compute the backprop expression
using ADType = ADScalar<Scalar, 1>;
ADType sigma = sigma_;
sigma.derivatives() = Vector<Scalar>::Unit(1, 0);
// Compute the integrals recursively
Vector<ADType> IP(l + 1);
Vector<ADType> ID(l + 1);
|
solver_reflected.h:
176
177
178
179
180
181
182
183
184
185
186 | */
inline Scalar compute(const Scalar &bterm, const RowVector<Scalar> &brT) {
Scalar bb = 0.0;
computeHI_with_grad(bterm);
// TODO: The gradient is infinite when bterm = +/- 1
// Not sure how best to handle this.
Scalar fac = sqrt(max(Scalar(1.0) - bterm * bterm, tol));
Scalar DfacDb = -bterm / fac;
int n = 0;
int i, j;
int mu, nu;
|
wigner.h:
393
394
395
396
397
398
399
400
401
402
403
404 | tol_ad, D_ad, R_ad);
// Extract the matrices and their derivatives
for (int l = 0; l < ydeg + 1; ++l) {
// TODO: This data copy is *very* slow; is there a better way?
for (int i = 0; i < 2 * l + 1; ++i) {
for (int j = 0; j < 2 * l + 1; ++j) {
R[l](i, j) = R_ad[l](i, j).value();
DRDx[l](i, j) = R_ad[l](i, j).derivatives()(0);
DRDy[l](i, j) = R_ad[l](i, j).derivatives()(1);
DRDz[l](i, j) = R_ad[l](i, j).derivatives()(2);
DRDtheta[l](i, j) = R_ad[l](i, j).derivatives()(3);
|
wigner.h:
505
506
507
508
509
510
511
512
513
514
515
516 | dotR_bM.setZero(npts, Ny);
if (unlikely(npts == 0)) return;
// Dot them in
// TODO: There must be a more efficient way of doing this.
for (int l = 0; l < ydeg + 1; ++l) {
// d / dargs
dotR_bx += (M.block(0, l * l, npts, 2 * l + 1) * DRDx[l])
.cwiseProduct(bMR.block(0, l * l, npts, 2 * l + 1))
.sum();
dotR_by += (M.block(0, l * l, npts, 2 * l + 1) * DRDy[l])
.cwiseProduct(bMR.block(0, l * l, npts, 2 * l + 1))
|
BandTriangularSolver.h:
13
14
15
16
17
18
19
20
21
22
23
24 | namespace internal {
/* \internal
* Solve Ax=b with A a band triangular matrix
* TODO: extend it to matrices for x abd b */
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, int StorageOrder>
struct band_solve_triangular_selector;
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar>
struct band_solve_triangular_selector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,RowMajor>
{
|
PackedSelfadjointProduct.h:
30
31
32
33
34
35
36
37
38
39
40 | for (Index i=0; i<size; ++i)
{
Map<Matrix<Scalar,Dynamic,1> >(mat, UpLo==Lower ? size-i : (i+1)) += alpha * cj(vec[i]) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)));
//FIXME This should be handled outside.
mat[UpLo==Lower ? 0 : i] = numext::real(mat[UpLo==Lower ? 0 : i]);
mat += UpLo==Lower ? size-i : (i+1);
}
}
};
template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
|
Rank2Update.h:
44
45
46
47
48
49
50
51
52
53
54
55 | {
Map<Matrix<Scalar,Dynamic,1> >(mat+offset, UpLo==Lower ? size-i : (i+1)) +=
numext::conj(alpha) * numext::conj(u[i]) * OtherMap(v+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1))
+ alpha * numext::conj(v[i]) * OtherMap(u+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1));
//FIXME This should be handled outside.
mat[offset+(UpLo==Lower ? 0 : i)] = numext::real(mat[offset+(UpLo==Lower ? 0 : i)]);
offset += UpLo==Lower ? size-i : (i+1);
}
}
};
} // end namespace internal
|
level1_cplx_impl.h:
108
109
110
111
112
113
114
115
116
117 | Reverse<StridedVectorType> rvx(vx);
Reverse<StridedVectorType> rvy(vy);
// TODO implement mixed real-scalar rotations
if(*incx<0 && *incy>0) internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation<Scalar>(c,s));
else if(*incx>0 && *incy<0) internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c,s));
else internal::apply_rotation_in_the_plane(vx, vy, JacobiRotation<Scalar>(c,s));
return 0;
}
|
level1_real_impl.h:
41
42
43
44
45
46
47
48
49
50
51 | else return 0;
}
// computes the Euclidean norm of a vector.
// FIXME
Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx)
{
// std::cerr << "_nrm2 " << *n << " " << *incx << "\n";
if(*n<=0) return 0;
Scalar* x = reinterpret_cast<Scalar*>(px);
|
level1_real_impl.h:
84
85
86
87
88
89
90
91
92
93
94
95 | {
Scalar* x = reinterpret_cast<Scalar*>(px);
Scalar* y = reinterpret_cast<Scalar*>(py);
// TODO
return 0;
}
// computes the modified parameters for a Givens rotation.
int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param)
{
|
level1_real_impl.h:
| // computes the modified parameters for a Givens rotation.
int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param)
{
// TODO
return 0;
}
*/
|
level3_impl.h:
264
265
266
267
268
269
270
271
272
273
274 | if(*m==0 || *n==0)
return 1;
// FIXME find a way to avoid this copy
Matrix<Scalar,Dynamic,Dynamic,ColMajor> tmp = matrix(b,*m,*n,*ldb);
matrix(b,*m,*n,*ldb).setZero();
if(SIDE(*side)==LEFT)
{
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m,1,false);
func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha, blocking);
|
level3_impl.h:
317
318
319
320
321
322
323
324
325
326
327
328 | }
int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
#if ISCOMPLEX
// FIXME add support for symmetric complex matrix
Matrix<Scalar,Dynamic,Dynamic,ColMajor> matA(size,size);
if(UPLO(*uplo)==UP)
{
matA.triangularView<Upper>() = matrix(a,size,size,*lda);
matA.triangularView<Lower>() = matrix(a,size,size,*lda).transpose();
}
else if(UPLO(*uplo)==LO)
|
level3_impl.h:
406
407
408
409
410
411
412
413
414
415
416
417 | if(*n==0 || *k==0)
return 0;
#if ISCOMPLEX
// FIXME add support for symmetric complex matrix
if(UPLO(*uplo)==UP)
{
if(OP(*op)==NOTR)
matrix(c, *n, *n, *ldc).triangularView<Upper>() += alpha * matrix(a,*n,*k,*lda) * matrix(a,*n,*k,*lda).transpose();
else
matrix(c, *n, *n, *ldc).triangularView<Upper>() += alpha * matrix(a,*k,*n,*lda).transpose() * matrix(a,*k,*n,*lda);
}
|
eigenvalues.cpp:
12
13
14
15
16
17
18
19
20
21
22 | // computes eigen values and vectors of a general N-by-N matrix A
EIGEN_LAPACK_FUNC(syev,(char *jobz, char *uplo, int* n, Scalar* a, int *lda, Scalar* w, Scalar* /*work*/, int* lwork, int *info))
{
// TODO exploit the work buffer
bool query_size = *lwork==-1;
*info = 0;
if(*jobz!='N' && *jobz!='V') *info = -1;
else if(UPLO(*uplo)==INVALID) *info = -2;
else if(*n<0) *info = -3;
else if(*lda<std::max(1,*n)) *info = -5;
|
svd.cpp:
13
14
15
16
17
18
19
20
21
22
23
24 | // computes the singular values/vectors a general M-by-N matrix A using divide-and-conquer
EIGEN_LAPACK_FUNC(gesdd,(char *jobz, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork,
EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int * /*iwork*/, int *info))
{
// TODO exploit the work buffer
bool query_size = *lwork==-1;
int diag_size = (std::min)(*m,*n);
*info = 0;
if(*jobz!='A' && *jobz!='S' && *jobz!='O' && *jobz!='N') *info = -1;
else if(*m<0) *info = -2;
else if(*n<0) *info = -3;
|
svd.cpp:
84
85
86
87
88
89
90
91
92
93
94
95 | // computes the singular values/vectors a general M-by-N matrix A using two sided jacobi algorithm
EIGEN_LAPACK_FUNC(gesvd,(char *jobu, char *jobv, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork,
EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int *info))
{
// TODO exploit the work buffer
bool query_size = *lwork==-1;
int diag_size = (std::min)(*m,*n);
*info = 0;
if( *jobu!='A' && *jobu!='S' && *jobu!='O' && *jobu!='N') *info = -1;
else if((*jobv!='A' && *jobv!='S' && *jobv!='O' && *jobv!='N')
|| (*jobu=='O' && *jobv=='O')) *info = -2;
|
array.cpp:
198
199
200
201
202
203
204
205
206
207
208
209 | VERIFY( (m1<-a || m1>a).count() == (m1.abs()>a).count());
typedef Array<typename ArrayType::Index, Dynamic, 1> ArrayOfIndices;
// TODO allows colwise/rowwise for array
VERIFY_IS_APPROX(((m1.abs()+1)>RealScalar(0.1)).colwise().count(), ArrayOfIndices::Constant(cols,rows).transpose());
VERIFY_IS_APPROX(((m1.abs()+1)>RealScalar(0.1)).rowwise().count(), ArrayOfIndices::Constant(rows, cols));
}
template<typename ArrayType> void array_real(const ArrayType& m)
{
using std::abs;
|
array_for_matrix.cpp:
141
142
143
144
145
146
147
148
149
150
151
152 | VERIFY( ((m1.array()<-a).matrix() || (m1.array()>a).matrix()).count() == (m1.cwiseAbs().array()>a).count());
typedef Matrix<typename MatrixType::Index, Dynamic, 1> VectorOfIndices;
// TODO allows colwise/rowwise for array
VERIFY_IS_APPROX(((m1.array().abs()+1)>RealScalar(0.1)).matrix().colwise().count(), VectorOfIndices::Constant(cols,rows).transpose());
VERIFY_IS_APPROX(((m1.array().abs()+1)>RealScalar(0.1)).matrix().rowwise().count(), VectorOfIndices::Constant(rows, cols));
}
template<typename VectorType> void lpNorm(const VectorType& v)
{
using std::sqrt;
|
ctorleak.cpp:
10
11
12
13
14
15
16
17
18
19
20 | Foo()
{
#ifdef EIGEN_EXCEPTIONS
// TODO: Is this the correct way to handle this?
if (Foo::object_count > Foo::object_limit) { std::cout << "\nThrow!\n"; throw Foo::Fail(); }
#endif
std::cout << '+';
++Foo::object_count;
}
~Foo()
|
denseLM.cpp:
106
107
108
109
110
111
112
113
114
115
116 | info = lm.minimize(uv);
VERIFY_IS_EQUAL(info, 1);
//FIXME Check other parameters
return info;
}
template<typename FunctorType, typename VectorType>
int test_lmder(FunctorType& functor, VectorType& uv)
{
typedef typename VectorType::Scalar Scalar;
|
denseLM.cpp:
119
120
121
122
123
124
125
126
127
128
129
130 | LevenbergMarquardt<FunctorType> lm(functor);
info = lm.lmder1(uv);
VERIFY_IS_EQUAL(info, 1);
//FIXME Check other parameters
return info;
}
template<typename FunctorType, typename VectorType>
int test_minimizeSteps(FunctorType& functor, VectorType& uv)
{
LevenbergMarquardtSpace::Status info;
|
denseLM.cpp:
137
138
139
140
141
142
143
144
145
146
147
148 | info = lm.minimizeOneStep(uv);
} while (info==LevenbergMarquardtSpace::Running);
VERIFY_IS_EQUAL(info, 1);
//FIXME Check other parameters
return info;
}
template<typename T>
void test_denseLM_T()
{
typedef Matrix<T,Dynamic,1> VectorType;
|
dynalloc.cpp:
82
83
84
85
86
87
88
89
90
91
92
93 | };
template<typename T> void check_dynaligned()
{
// TODO have to be updated once we support multiple alignment values
if(T::SizeAtCompileTime % ALIGNMENT == 0)
{
T* obj = new T;
VERIFY(T::NeedsToAlign==1);
VERIFY(internal::UIntPtr(obj)%ALIGNMENT==0);
delete obj;
}
|
eigensolver_generalized_real.cpp:
52
53
54
55
56
57
58
59
60
61
62 | // non symmetric case:
{
GeneralizedEigenSolver<MatrixType> eig(rows);
// TODO enable full-prealocation of required memory, this probably requires an in-place mode for HessenbergDecomposition
//Eigen::internal::set_is_malloc_allowed(false);
eig.compute(a,b);
//Eigen::internal::set_is_malloc_allowed(true);
for(Index k=0; k<cols; ++k)
{
Matrix<ComplexScalar,Dynamic,Dynamic> tmp = (eig.betas()(k)*a).template cast<ComplexScalar>() - eig.alphas()(k)*b;
if(tmp.size()>1 && tmp.norm()>(std::numeric_limits<Scalar>::min)())
|
eigensolver_selfadjoint.cpp:
149
150
151
152
153
154
155
156
157
158
159 | VERIFY_IS_APPROX(tridiag.diagonal(), tridiag.matrixT().diagonal());
VERIFY_IS_APPROX(tridiag.subDiagonal(), tridiag.matrixT().template diagonal<-1>());
Matrix<RealScalar,Dynamic,Dynamic> T = tridiag.matrixT();
if(rows>1 && cols>1) {
// FIXME check that upper and lower part are 0:
//VERIFY(T.topRightCorner(rows-2, cols-2).template triangularView<Upper>().isZero());
}
VERIFY_IS_APPROX(tridiag.diagonal(), T.diagonal());
VERIFY_IS_APPROX(tridiag.subDiagonal(), T.template diagonal<1>());
VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView<Lower>()), tridiag.matrixQ() * tridiag.matrixT().eval() * MatrixType(tridiag.matrixQ()).adjoint());
VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView<Lower>()), tridiag.matrixQ() * tridiag.matrixT() * tridiag.matrixQ().adjoint());
|
evaluators.cpp:
260
261
262
263
264
265
266
267
268
269
270
271 | // test direct traversal
Matrix3f m3;
Array33f a3;
VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity()); // matrix, nullary
// TODO: find a way to test direct traversal with array
VERIFY_IS_APPROX_EVALUATOR(m3.transpose(), Matrix3f::Identity().transpose()); // transpose
VERIFY_IS_APPROX_EVALUATOR(m3, 2 * Matrix3f::Identity()); // unary
VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity() + Matrix3f::Zero()); // binary
VERIFY_IS_APPROX_EVALUATOR(m3.block(0,0,2,2), Matrix3f::Identity().block(1,1,2,2)); // block
// test linear traversal
VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Zero()); // matrix, nullary
|
evaluators.cpp:
279
280
281
282
283
284
285
286
287
288
289
290 | Array44f a4, a4src = Matrix4f::Random();
VERIFY_IS_APPROX_EVALUATOR(m4, m4src); // matrix
VERIFY_IS_APPROX_EVALUATOR(a4, a4src); // array
VERIFY_IS_APPROX_EVALUATOR(m4.transpose(), m4src.transpose()); // transpose
// TODO: find out why Matrix4f::Zero() does not allow inner vectorization
VERIFY_IS_APPROX_EVALUATOR(m4, 2 * m4src); // unary
VERIFY_IS_APPROX_EVALUATOR(m4, m4src + m4src); // binary
// test linear vectorization
MatrixXf mX(6,6), mXsrc = MatrixXf::Random(6,6);
ArrayXXf aX(6,6), aXsrc = ArrayXXf::Random(6,6);
VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc); // matrix
|
geo_quaternion.cpp:
239
240
241
242
243
244
245
246
247
248
249
250 | const Quaternionx& cq3(q3);
VERIFY( &cq3.x() == &q3.x() );
const MQuaternionUA& cmq3(mq3);
VERIFY( &cmq3.x() == &mq3.x() );
// FIXME the following should be ok. The problem is that currently the LValueBit flag
// is used to determine wether we can return a coeff by reference or not, which is not enough for Map<const ...>.
//const MCQuaternionUA& cmcq3(mcq3);
//VERIFY( &cmcq3.x() == &mcq3.x() );
}
template<typename Scalar> void quaternionAlignment(void){
typedef Quaternion<Scalar,AutoAlign> QuaternionA;
|
geo_transformations.cpp:
165
166
167
168
169
170
171
172
173
174
175
176 | VERIFY_IS_APPROX(AngleAxisx(m).toRotationMatrix(),
Quaternionx(m).toRotationMatrix());
// Transform
// TODO complete the tests !
a = 0;
while (abs(a)<Scalar(0.1))
a = internal::random<Scalar>(-Scalar(0.4)*Scalar(EIGEN_PI), Scalar(0.4)*Scalar(EIGEN_PI));
q1 = AngleAxisx(a, v0.normalized());
Transform3 t0, t1, t2;
// first test setIdentity() and Identity()
|
hessenberg.cpp:
45
46
47
48
49
50
51
52
53
54
55
56 | VERIFY_RAISES_ASSERT( hessUninitialized.matrixQ() );
VERIFY_RAISES_ASSERT( hessUninitialized.householderCoefficients() );
VERIFY_RAISES_ASSERT( hessUninitialized.packedMatrix() );
// TODO: Add tests for packedMatrix() and householderCoefficients()
}
void test_hessenberg()
{
CALL_SUBTEST_1(( hessenberg<std::complex<double>,1>() ));
CALL_SUBTEST_2(( hessenberg<std::complex<double>,2>() ));
CALL_SUBTEST_3(( hessenberg<std::complex<float>,4>() ));
|
nomalloc.cpp:
148
149
150
151
152
153
154
155
156
157
158
159 | Eigen::ColPivHouseholderQR<Matrix> cpQR; cpQR.compute(A);
X = cpQR.solve(B);
x = cpQR.solve(b);
Eigen::FullPivHouseholderQR<Matrix> fpQR; fpQR.compute(A);
// FIXME X = fpQR.solve(B);
x = fpQR.solve(b);
// SVD module
Eigen::JacobiSVD<Matrix> jSVD; jSVD.compute(A, ComputeFullU | ComputeFullV);
}
void test_zerosized() {
|
prec_inverse_4x4.cpp:
48
49
50
51
52
53
54
55
56
57
58
59 | std::cerr << "inverse_general_4x4, Scalar = " << type_name<Scalar>() << std::endl;
double error_avg = error_sum / repeat;
EIGEN_DEBUG_VAR(error_avg);
EIGEN_DEBUG_VAR(error_max);
// FIXME that 1.25 used to be a 1.0 until the NumTraits changes on 28 April 2010, what's going wrong??
// FIXME that 1.25 used to be 1.2 until we tested gcc 4.1 on 30 June 2010 and got 1.21.
VERIFY(error_avg < (NumTraits<Scalar>::IsComplex ? 8.0 : 1.25));
VERIFY(error_max < (NumTraits<Scalar>::IsComplex ? 64.0 : 20.0));
{
int s = 5;//internal::random<int>(4,10);
int i = 0;//internal::random<int>(0,s-4);
|
prec_inverse_4x4.cpp:
49
50
51
52
53
54
55
56
57
58
59
60 | double error_avg = error_sum / repeat;
EIGEN_DEBUG_VAR(error_avg);
EIGEN_DEBUG_VAR(error_max);
// FIXME that 1.25 used to be a 1.0 until the NumTraits changes on 28 April 2010, what's going wrong??
// FIXME that 1.25 used to be 1.2 until we tested gcc 4.1 on 30 June 2010 and got 1.21.
VERIFY(error_avg < (NumTraits<Scalar>::IsComplex ? 8.0 : 1.25));
VERIFY(error_max < (NumTraits<Scalar>::IsComplex ? 64.0 : 20.0));
{
int s = 5;//internal::random<int>(4,10);
int i = 0;//internal::random<int>(0,s-4);
int j = 0;//internal::random<int>(0,s-4);
|
product_large.cpp:
59
60
61
62
63
64
65
66
67
68
69
70 | }
{
// check the functions to setup blocking sizes compile and do not segfault
// FIXME check they do what they are supposed to do !!
std::ptrdiff_t l1 = internal::random<int>(10000,20000);
std::ptrdiff_t l2 = internal::random<int>(100000,200000);
std::ptrdiff_t l3 = internal::random<int>(1000000,2000000);
setCpuCacheSizes(l1,l2,l3);
VERIFY(l1==l1CacheSize());
VERIFY(l2==l2CacheSize());
std::ptrdiff_t k1 = internal::random<int>(10,100)*16;
|
product_trmm.cpp:
76
77
78
79
80
81
82
83
84
85
86
87 | VERIFY_IS_APPROX( ge_xs = (s1*mat).adjoint().template triangularView<Mode>() * ge_left.adjoint(), numext::conj(s1) * triTr.conjugate() * ge_left.adjoint());
VERIFY_IS_APPROX( ge_xs = (s1*mat).transpose().template triangularView<Mode>() * ge_left.adjoint(), s1triTr * ge_left.adjoint());
// TODO check with sub-matrix expressions ?
}
template<typename Scalar, int Mode, int TriOrder>
void trmv(int rows=get_random_size<Scalar>(), int cols=get_random_size<Scalar>())
{
trmm<Scalar,Mode,TriOrder,ColMajor,ColMajor,1>(rows,cols,1);
}
|
product_trmv.cpp:
67
68
69
70
71
72
73
74
75
76
77
78 | VERIFY((v1.transpose() * m3).isApprox(v1.transpose() * m1.template triangularView<Eigen::Lower>(), largerEps));
VERIFY((v1.adjoint() * m3).isApprox(v1.adjoint() * m1.template triangularView<Eigen::Lower>(), largerEps));
VERIFY((v1.adjoint() * m3.adjoint()).isApprox(v1.adjoint() * m1.template triangularView<Eigen::Lower>().adjoint(), largerEps));
// TODO check with sub-matrices
}
void test_product_trmv()
{
int s = 0;
for(int i = 0; i < g_repeat ; i++) {
CALL_SUBTEST_1( trmv(Matrix<float, 1, 1>()) );
|
qr.cpp:
37
38
39
40
41
42
43
44
45
46
47
48 | Matrix<Scalar,Rows,Cols> m1 = Matrix<Scalar,Rows,Cols>::Random();
HouseholderQR<Matrix<Scalar,Rows,Cols> > qr(m1);
Matrix<Scalar,Rows,Cols> r = qr.matrixQR();
// FIXME need better way to construct trapezoid
for(int i = 0; i < Rows; i++) for(int j = 0; j < Cols; j++) if(i>j) r(i,j) = Scalar(0);
VERIFY_IS_APPROX(m1, qr.householderQ() * r);
Matrix<Scalar,Cols,Cols2> m2 = Matrix<Scalar,Cols,Cols2>::Random(Cols,Cols2);
Matrix<Scalar,Rows,Cols2> m3 = m1*m2;
m2 = Matrix<Scalar,Cols,Cols2>::Random(Cols,Cols2);
|
qr_fullpivoting.cpp:
37
38
39
40
41
42
43
44
45
46
47 | MatrixQType q = qr.matrixQ();
VERIFY_IS_UNITARY(q);
// FIXME need better way to construct trapezoid
for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) if(i>j) r(i,j) = Scalar(0);
MatrixType c = qr.matrixQ() * r * qr.colsPermutation().inverse();
VERIFY_IS_APPROX(m1, c);
// stress the ReturnByValue mechanism
|
qr_fullpivoting.cpp:
128
129
130
131
132
133
134
135
136
137
138 | void test_qr_fullpivoting()
{
for(int i = 0; i < 1; i++) {
// FIXME : very weird bug here
// CALL_SUBTEST(qr(Matrix2f()) );
CALL_SUBTEST_1( qr<MatrixXf>() );
CALL_SUBTEST_2( qr<MatrixXd>() );
CALL_SUBTEST_3( qr<MatrixXcd>() );
}
for(int i = 0; i < g_repeat; i++) {
|
real_qz.cpp:
42
43
44
45
46
47
48
49
50
51
52
53 | break;
}
RealQZ<MatrixType> qz(dim);
// TODO enable full-prealocation of required memory, this probably requires an in-place mode for HessenbergDecomposition
//Eigen::internal::set_is_malloc_allowed(false);
qz.compute(A,B);
//Eigen::internal::set_is_malloc_allowed(true);
VERIFY_IS_EQUAL(qz.info(), Success);
// check for zeros
bool all_zeros = true;
|
sparse.h:
76
77
78
79
80
81
82
83
84
85
86
87 | std::swap(ai,aj);
Scalar v = (internal::random<double>(0,1) < density) ? internal::random<Scalar>() : Scalar(0);
if ((flags&ForceNonZeroDiag) && (i==j))
{
// FIXME: the following is too conservative
v = internal::random<Scalar>()*Scalar(3.);
v = v*v;
if(numext::real(v)>0) v += Scalar(5);
else v -= Scalar(5);
}
if ((flags & MakeLowerTriangular) && aj>ai)
v = Scalar(0);
|
sparse_product.cpp:
286
287
288
289
290
291
292
293
294
295
296
297 | mLo = mUp.adjoint();
refS = refUp + refLo;
refS.diagonal() *= 0.5;
mS = mUp + mLo;
// TODO be able to address the diagonal....
for (int k=0; k<mS.outerSize(); ++k)
for (typename SparseMatrixType::InnerIterator it(mS,k); it; ++it)
if (it.index() == k)
it.valueRef() *= Scalar(0.5);
VERIFY_IS_APPROX(refS.adjoint(), refS);
VERIFY_IS_APPROX(mS.adjoint(), mS);
|
tensor_benchmarks.h:
12
13
14
15
16
17
18
19
20
21
22 | using Eigen::Tensor;
using Eigen::TensorMap;
// TODO(bsteiner): also templatize on the input type since we have users
// for int8 as well as floats.
template <typename Device, typename T> class BenchmarkSuite {
public:
BenchmarkSuite(const Device& device, size_t m, size_t k, size_t n)
: m_(m), k_(k), n_(n), device_(device) {
initialize();
}
|
camera.cpp:
100
101
102
103
104
105
106
107
108
109
110
111 | }
void Camera::setDirection(const Vector3f& newDirection)
{
// TODO implement it computing the rotation between newDirection and current dir ?
Vector3f up = this->up();
Matrix3f camAxes;
camAxes.col(2) = (-newDirection).normalized();
camAxes.col(0) = up.cross( camAxes.col(2) ).normalized();
camAxes.col(1) = camAxes.col(2).cross( camAxes.col(0) ).normalized();
|
camera.cpp:
257
258
259
260
261
262
263
264 | Vector3f a(2.*uv.x()/float(mVpWidth)-1., 2.*uv.y()/float(mVpHeight)-1., 1.);
a.x() *= depth/mProjectionMatrix(0,0);
a.y() *= depth/mProjectionMatrix(1,1);
a.z() = -depth;
// FIXME /\/|
Vector4f b = invModelview * Vector4f(a.x(), a.y(), a.z(), 1.);
return Vector3f(b.x(), b.y(), b.z());
}
|
MatrixBase_triangularView.cpp:
| cout << "Here is the strictly-upper-triangular matrix extracted from m:" << endl
<< Matrix3i(m.triangularView<Eigen::StrictlyUpper>()) << endl;
cout << "Here is the unit-lower-triangular matrix extracted from m:" << endl
<< Matrix3i(m.triangularView<Eigen::UnitLower>()) << endl;
// FIXME need to implement output for triangularViews (Bug 885)
|
AlignedVector3:
33
34
35
36
37
38
39
40
41
42
43
44 | * the same result can be achieved by directly using a 4D vector.
* This class makes this process simpler.
*
*/
// TODO specialize Cwise
template<typename _Scalar> class AlignedVector3;
namespace internal {
template<typename _Scalar> struct traits<AlignedVector3<_Scalar> >
: traits<Matrix<_Scalar,3,1,0,4,1> >
{
};
|
FFT:
76
77
78
79
80
81
82
83
84
85
86
87 | //template <typename T> typedef struct internal::fftw_impl default_fft_impl; this does not work
template <typename T> struct default_fft_impl : public internal::fftw_impl<T> {};
}
#elif defined EIGEN_MKL_DEFAULT
// TODO
// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form
# include "src/FFT/ei_imklfft_impl.h"
namespace Eigen {
template <typename T> struct default_fft_impl : public internal::imklfft_impl {};
}
#else
// internal::kissfft_impl: small, free, reasonably efficient default, derived from kissfft
|
FFT:
355
356
357
358
359
360
361
362
363
364
365
366 | }
/*
// TODO: multi-dimensional FFTs
inline
void inv2(Complex * dst, const Complex * src, int n0,int n1)
{
m_impl.inv2(dst,src,n0,n1);
if ( HasFlag( Unscaled ) == false)
scale(dst,1./(n0*n1),n0*n1);
}
|
NonLinearOptimization.cpp:
235
236
237
238
239
240
241
242
243
244 | MatrixXd cov;
cov = covfac*lm.fjac.topLeftCorner<n,n>();
VERIFY_IS_APPROX( cov, cov_ref);
// TODO: why isn't this allowed ? :
// VERIFY_IS_APPROX( covfac*fjac.topLeftCorner<n,n>() , cov_ref);
}
struct hybrj_functor : Functor<double>
{
hybrj_functor(void) : Functor<double>(9,9) {}
|
NonLinearOptimization.cpp:
626
627
628
629
630
631
632
633
634
635
636 | MatrixXd cov;
cov = covfac*lm.fjac.topLeftCorner<n,n>();
VERIFY_IS_APPROX( cov, cov_ref);
// TODO: why isn't this allowed ? :
// VERIFY_IS_APPROX( covfac*fjac.topLeftCorner<n,n>() , cov_ref);
}
struct chwirut2_functor : Functor<double>
{
chwirut2_functor(void) : Functor<double>(3,54) {}
static const double m_x[54];
|
autodiff.cpp:
210
211
212
213
214
215
216
217
218
219
220
221 | VERIFY_IS_APPROX(y, yref);
VERIFY_IS_APPROX(j, jref);
}
// TODO also check actual derivatives!
template <int>
void test_autodiff_scalar()
{
Vector2f p = Vector2f::Random();
typedef AutoDiffScalar<Vector2f> AD;
AD ax(p.x(),Vector2f::UnitX());
AD ay(p.y(),Vector2f::UnitY());
|
autodiff.cpp:
223
224
225
226
227
228
229
230
231
232
233
234 | VERIFY_IS_APPROX(res.value(), foo(p.x(),p.y()));
}
// TODO also check actual derivatives!
template <int>
void test_autodiff_vector()
{
Vector2f p = Vector2f::Random();
typedef AutoDiffScalar<Vector2f> AD;
typedef Matrix<AD,2,1> VectorAD;
VectorAD ap = p.cast<AD>();
|
autodiff_scalar.cpp:
11
12
13
14
15
16
17
18
19
20
21 | #include <unsupported/Eigen/AutoDiff>
/*
* In this file scalar derivations are tested for correctness.
* TODO add more tests!
*/
template<typename Scalar> void check_atan2()
{
typedef Matrix<Scalar, 1, 1> Deriv1;
typedef AutoDiffScalar<Deriv1> AD;
|
cxx11_tensor_concatenation.cpp:
99
100
101
102
103
104
105
106
107
108
109
110 | }
}
// TODO(phli): Add test once we have a real vectorized implementation.
// static void test_vectorized_concatenation() {}
static void test_concatenation_as_lvalue()
{
Tensor<int, 2> t1(2, 3);
Tensor<int, 2> t2(2, 3);
t1.setRandom();
|
cxx11_tensor_random.cpp:
15
16
17
18
19
20
21
22
23
24
25
26 | {
Tensor<float, 1> vec(6);
vec.setRandom();
// Fixme: we should check that the generated numbers follow a uniform
// distribution instead.
for (int i = 1; i < 6; ++i) {
VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1));
}
}
static void test_normal()
|
cxx11_tensor_random.cpp:
27
28
29
30
31
32
33
34
35
36 | {
Tensor<float, 1> vec(6);
vec.setRandom<Eigen::internal::NormalRandomGenerator<float>>();
// Fixme: we should check that the generated numbers follow a gaussian
// distribution instead.
for (int i = 1; i < 6; ++i) {
VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1));
}
}
|
levenberg_marquardt.cpp:
8
9
10
11
12
13
14
15
16
17
18 | // Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// FIXME: These tests all check for hard-coded values. Ideally, parameters and start estimates should be randomized.
#include <stdio.h>
#include "main.h"
#include <unsupported/Eigen/LevenbergMarquardt>
|
levenberg_marquardt.cpp:
133
134
135
136
137
138
139
140
141
142
143 | MatrixXd cov;
cov = covfac*lm.matrixR().topLeftCorner<n,n>();
VERIFY_IS_APPROX( cov, cov_ref);
// TODO: why isn't this allowed ? :
// VERIFY_IS_APPROX( covfac*fjac.topLeftCorner<n,n>() , cov_ref);
}
struct lmdif_functor : DenseFunctor<double>
{
lmdif_functor(void) : DenseFunctor<double>(3,15) {}
int operator()(const VectorXd &x, VectorXd &fvec) const
|
levenberg_marquardt.cpp:
236
237
238
239
240
241
242
243
244
245
246 | MatrixXd cov;
cov = covfac*lm.matrixR().topLeftCorner<n,n>();
VERIFY_IS_APPROX( cov, cov_ref);
// TODO: why isn't this allowed ? :
// VERIFY_IS_APPROX( covfac*fjac.topLeftCorner<n,n>() , cov_ref);
}
struct chwirut2_functor : DenseFunctor<double>
{
chwirut2_functor(void) : DenseFunctor<double>(3,54) {}
static const double m_x[54];
|
LDLT.h:
224
225
226
227
228
229
230
231
232
233
234 | LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);
/** \returns the internal LDLT decomposition matrix
*
* TODO: document the storage layout
*/
inline const MatrixType& matrixLDLT() const
{
eigen_assert(m_isInitialized && "LDLT is not initialized.");
return m_matrix;
}
|
LDLT.h:
499
500
501
502
503
504
505
506
507
508
509
510 | m_matrix = a.derived();
// Compute matrix L1 norm = max abs column sum.
m_l1_norm = RealScalar(0);
// TODO move this code to SelfAdjointView
for (Index col = 0; col < size; ++col) {
RealScalar abs_col_sum;
if (_UpLo == Lower)
abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
else
abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
if (abs_col_sum > m_l1_norm)
|
LLT.h:
166
167
168
169
170
171
172
173
174
175
176 | }
/** \returns the LLT decomposition matrix
*
* TODO: document the storage layout
*/
inline const MatrixType& matrixLLT() const
{
eigen_assert(m_isInitialized && "LLT is not initialized.");
return m_matrix;
}
|
LLT.h:
433
434
435
436
437
438
439
440
441
442
443
444 | m_matrix = a.derived();
// Compute matrix L1 norm = max abs column sum.
m_l1_norm = RealScalar(0);
// TODO move this code to SelfAdjointView
for (Index col = 0; col < size; ++col) {
RealScalar abs_col_sum;
if (_UpLo == Lower)
abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
else
abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
if (abs_col_sum > m_l1_norm)
|
CholmodSupport.h:
298
299
300
301
302
303
304
305
306
307
308
309 | {
this->m_info = NumericalIssue;
return;
}
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
cholmod_free_dense(&x_cd, &m_cholmod);
}
/** \internal */
template<typename RhsDerived, typename DestDerived>
void _solve_impl(const SparseMatrixBase<RhsDerived> &b, SparseMatrixBase<DestDerived> &dest) const
|
CholmodSupport.h:
321
322
323
324
325
326
327
328
329
330
331
332 | {
this->m_info = NumericalIssue;
return;
}
// TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs);
cholmod_free_sparse(&x_cs, &m_cholmod);
}
#endif // EIGEN_PARSED_BY_DOXYGEN
/** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization.
|
Array.h:
135
136
137
138
139
140
141
142
143
144
145
146 | EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
// FIXME is it still needed ??
/** \internal */
EIGEN_DEVICE_FUNC
Array(internal::constructor_without_unaligned_array_assert)
: Base(internal::constructor_without_unaligned_array_assert())
{
Base::_check_template_params();
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
|
AssignEvaluator.h:
54
55
56
57
58
59
60
61
62
63
64
65 | OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime
};
// TODO distinguish between linear traversal and inner-traversals
typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
enum {
LinearPacketSize = unpacket_traits<LinearPacketType>::size,
InnerPacketSize = unpacket_traits<InnerPacketType>::size
};
|
AssignEvaluator.h:
191
192
193
194
195
196
197
198
199
200
201 | template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
{
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime
};
|
AssignEvaluator.h:
256
257
258
259
260
261
262
263
264
265
266 | template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling
{
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime,
|
AssignEvaluator.h:
372
373
374
375
376
377
378
379
380
381
382
383 | struct unaligned_dense_assignment_loop<false>
{
// MSVC must not inline this functions. If it does, it fails to optimize the
// packet access path.
// FIXME check which version exhibits this issue
#if EIGEN_COMP_MSVC
template <typename Kernel>
static EIGEN_DONT_INLINE void run(Kernel &kernel,
Index start,
Index end)
#else
template <typename Kernel>
|
AssignEvaluator.h:
692
693
694
695
696
697
698
699
700
701
702 | protected:
DstEvaluatorType& m_dst;
const SrcEvaluatorType& m_src;
const Functor &m_functor;
// TODO find a way to avoid the needs of the original expression
DstXprType& m_dstExpr;
};
/***************************************************************************
* Part 5 : Entry point for dense rectangular assignment
***************************************************************************/
|
AssignEvaluator.h:
827
828
829
830
831
832
833
834
835
836
837
838 | typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
ActualDstType actualDst(dst);
// TODO check whether this is the right place to perform these checks:
EIGEN_STATIC_ASSERT_LVALUE(Dst)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
}
template<typename Dst, typename Src>
|
AssignEvaluator.h:
845
846
847
848
849
850
851
852
853
854
855
856 | template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
{
// TODO check whether this is the right place to perform these checks:
EIGEN_STATIC_ASSERT_LVALUE(Dst)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
Assignment<Dst,Src,Func>::run(dst, src, func);
}
template<typename Dst, typename Src>
|
AssignEvaluator.h:
880
881
882
883
884
885
886
887
888
889
890
891 | }
};
// Generic assignment through evalTo.
// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
// Note that the last template argument "Weak" is needed to make it possible to perform
// both partial specialization+SFINAE without ambiguous specialization
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
{
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
|
Block.h:
46
47
48
49
50
51
52
53
54
55
56
57 | OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(outer_stride_at_compile_time<XprType>::ret)
: int(inner_stride_at_compile_time<XprType>::ret),
// FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
// FIXME DirectAccessBit should not be handled by expressions
//
// Alignment is needed by MapBase's assertions
// We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
|
Block.h:
50
51
52
53
54
55
56
57
58
59
60 | // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
// FIXME DirectAccessBit should not be handled by expressions
//
// Alignment is needed by MapBase's assertions
// We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
Alignment = 0
};
};
|
Block.h:
414
415
416
417
418
419
420
421
422
423
424
425 | return m_startCol.value();
}
#ifndef __SUNPRO_CC
// FIXME sunstudio is not friendly with the above friend...
// META-FIXME there is no 'friend' keyword around here. Is this obsolete?
protected:
#endif
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** \internal used by allowAligned() */
EIGEN_DEVICE_FUNC
|
CommaInitializer.h:
45
46
47
48
49
50
51
52
53
54
55
56 | }
/* Copy/Move constructor which transfers ownership. This is crucial in
* absence of return value optimization to avoid assertions during destruction. */
// FIXME in C++11 mode this could be replaced by a proper RValue constructor
EIGEN_DEVICE_FUNC
inline CommaInitializer(const CommaInitializer& o)
: m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
// Mark original object as finished. In absence of R-value references we need to const_cast:
const_cast<CommaInitializer&>(o).m_row = m_xpr.rows();
const_cast<CommaInitializer&>(o).m_col = m_xpr.cols();
const_cast<CommaInitializer&>(o).m_currentBlockRows = 0;
|
CoreEvaluators.h:
93
94
95
96
97
98
99
100
101
102
103
104 | EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {}
};
// TODO: Think about const-correctness
template<typename T>
struct evaluator<const T>
: evaluator<T>
{
EIGEN_DEVICE_FUNC
explicit evaluator(const T& xpr) : evaluator<T>(xpr) {}
};
|
CoreEvaluators.h:
107
108
109
110
111
112
113
114
115
116 | template<typename ExpressionType>
struct evaluator_base : public noncopyable
{
// TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
typedef traits<ExpressionType> ExpressionTraits;
enum {
Alignment = 0
};
};
|
CoreEvaluators.h:
798
799
800
801
802
803
804
805
806
807
808
809 | };
// -------------------- Map --------------------
// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ?
// but that might complicate template specialization
template<typename Derived, typename PlainObjectType>
struct mapbase_evaluator;
template<typename Derived, typename PlainObjectType>
struct mapbase_evaluator : evaluator_base<Derived>
{
|
CoreEvaluators.h:
897
898
899
900
901
902
903
904
905
906
907
908 | : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>
{
typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
typedef typename XprType::Scalar Scalar;
// TODO: should check for smaller packet types once we can handle multi-sized packet types
typedef typename packet_traits<Scalar>::type PacketScalar;
enum {
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
? int(PlainObjectType::InnerStrideAtCompileTime)
: int(StrideType::InnerStrideAtCompileTime),
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
|
CoreEvaluators.h:
953
954
955
956
957
958
959
960
961
962
963
964 | : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
typedef typename XprType::Scalar Scalar;
// TODO: should check for smaller packet types once we can handle multi-sized packet types
typedef typename packet_traits<Scalar>::type PacketScalar;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
RowsAtCompileTime = traits<XprType>::RowsAtCompileTime,
ColsAtCompileTime = traits<XprType>::ColsAtCompileTime,
|
CoreEvaluators.h:
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116 | const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
};
// TODO: This evaluator does not actually use the child evaluator;
// all action is via the data() as returned by the Block expression.
template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true>
: mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,
typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
{
|
CoreEvaluators.h:
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129 | EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
{
// TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
}
};
// -------------------- Select --------------------
// NOTE shall we introduce a ternary_evaluator?
|
CoreEvaluators.h:
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138 | // -------------------- Select --------------------
// NOTE shall we introduce a ternary_evaluator?
// TODO enable vectorization for Select
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
: evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
{
typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
enum {
CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
|
CoreEvaluators.h:
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457 | CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
// let's enable LinearAccess only with vectorization because of the product overhead
// FIXME enable DirectAccess with negative strides?
Flags0 = evaluator<ArgType>::Flags,
LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
|| ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1))
? LinearAccessBit : 0,
Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),
|
CoreEvaluators.h:
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529 | template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x)
{
// FIXME we could factorize some code with packet(i,j)
enum {
PacketSize = unpacket_traits<PacketType>::size,
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
};
typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
m_argImpl.template writePacket<LoadMode>(
|
CoreIterators.h:
26
27
28
29
30
31
32
33
34
35
36
37 | * \brief An InnerIterator allows to loop over the element of any matrix expression.
*
* \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed.
*
* TODO: add a usage example
*/
template<typename XprType>
class InnerIterator
{
protected:
typedef internal::inner_iterator_selector<XprType, typename internal::evaluator_traits<XprType>::Kind> IteratorType;
typedef internal::evaluator<XprType> EvaluatorType;
|
DenseCoeffsBase.h:
508
509
510
511
512
513
514
515
516
517
518
519 | {
return derived().outerStride();
}
// FIXME shall we remove it ?
inline Index stride() const
{
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
}
/** \returns the pointer increment between two consecutive rows.
*
|
DenseCoeffsBase.h:
582
583
584
585
586
587
588
589
590
591
592
593 | {
return derived().outerStride();
}
// FIXME shall we remove it ?
inline Index stride() const
{
return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
}
/** \returns the pointer increment between two consecutive rows.
*
|
EigenBase.h:
36
37
38
39
40
41
42
43
44
45
46
47 | * \sa StorageIndex, \ref TopicPreprocessorDirectives.
*/
typedef Eigen::Index Index;
// FIXME is it needed?
typedef typename internal::traits<Derived>::StorageKind StorageKind;
/** \returns a reference to the derived object */
EIGEN_DEVICE_FUNC
Derived& derived() { return *static_cast<Derived*>(this); }
/** \returns a const reference to the derived object */
EIGEN_DEVICE_FUNC
|
GeneralProduct.h:
84
85
86
87
88
89
90
91
92
93
94 | /* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one.
template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; };
template<int M> struct product_type_selector<M, 1, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<int N> struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; };
template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; };
template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; };
|
GeneralProduct.h:
116
117
118
119
120
121
122
123
124
125
126
127 | /***********************************************************************
* Implementation of Inner Vector Vector Product
***********************************************************************/
// FIXME : maybe the "inner product" could return a Scalar
// instead of a 1x1 matrix ??
// Pro: more natural for the user
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
// product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
/***********************************************************************
|
GeneralProduct.h:
224
225
226
227
228
229
230
231
232
233
234
235 | // make sure Dest is a compile-time vector type (bug 1166)
typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
};
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
|
GeneralProduct.h:
313
314
315
316
317
318
319
320
321
322
323
324 | ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
* RhsBlasTraits::extractScalarFactor(rhs);
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
};
gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
|
GeneralProduct.h:
350
351
352
353
354
355
356
357
358
359
360 | template<typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
// TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
typename nested_eval<Rhs,1>::type actual_rhs(rhs);
const Index size = rhs.rows();
for(Index k=0; k<size; ++k)
dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
}
};
|
GenericPacketMath.h:
350
351
352
353
354
355
356
357
358
359
360 | /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
{
// FIXME: uncomment the following in case we drop the internal imag and real functions.
// using std::imag;
// using std::real;
return Packet(imag(a),real(a));
}
/**************************
* Special math functions
|
GlobalFunctions.h:
181
182
183
184
185
186
187 | EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op)
}
}
// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random, internal::isApprox...)
#endif // EIGEN_GLOBAL_FUNCTIONS_H
|
IO.h:
56
57
58
59
60
61
62
63
64
65
66
67 | const std::string& _matPrefix="", const std::string& _matSuffix="")
: matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),
rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
{
// TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
// don't add rowSpacer if columns are not to be aligned
if((flags & DontAlignCols))
return;
int i = int(matSuffix.length())-1;
while (i>=0 && matSuffix[i]!='\n')
{
rowSpacer += ' ';
|
MathFunctions.h:
10
11
12
13
14
15
16
17
18
19
20
21 | #ifndef EIGEN_MATHFUNCTIONS_H
#define EIGEN_MATHFUNCTIONS_H
// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html
// TODO this should better be moved to NumTraits
#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
namespace Eigen {
// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.
|
Matrix.h:
44
45
46
47
48
49
50
51
52
53
54
55 | Options = _Options,
InnerStrideAtCompileTime = 1,
OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
// FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
Alignment = actual_alignment
};
};
}
/** \class Matrix
|
Matrix.h:
261
262
263
264
265
266
267
268
269
270
271
272 | Base::_check_template_params();
EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
// FIXME is it still needed
EIGEN_DEVICE_FUNC
explicit Matrix(internal::constructor_without_unaligned_array_assert)
: Base(internal::constructor_without_unaligned_array_assert())
{ Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
#if EIGEN_HAS_RVALUE_REFERENCES
EIGEN_DEVICE_FUNC
|
MatrixBase.h:
297
298
299
300
301
302
303
304
305
306
307
308 | { return cwiseNotEqual(other).any(); }
NoAlias<Derived,Eigen::MatrixBase > noalias();
// TODO forceAlignedAccess is temporarily disabled
// Need to find a nicer workaround.
inline const Derived& forceAlignedAccess() const { return derived(); }
inline Derived& forceAlignedAccess() { return derived(); }
template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
EIGEN_DEVICE_FUNC Scalar trace() const;
|
PlainObjectBase.h:
487
488
489
490
491
492
493
494
495
496
497
498 | // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
// FIXME is it still needed ?
/** \internal */
EIGEN_DEVICE_FUNC
explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
: m_storage(internal::constructor_without_unaligned_array_assert())
{
// _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
}
|
PlainObjectBase.h:
552
553
554
555
556
557
558
559
560
561
562
563 | EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
{
_check_template_params();
// FIXME this does not automatically transpose vectors if necessary
resize(other.rows(), other.cols());
other.evalTo(this->derived());
}
public:
/** \brief Copies the generic expression \a other into *this.
|
Product.h:
38
39
40
41
42
43
44
45
46
47
48
49 | ColsAtCompileTime = RhsTraits::ColsAtCompileTime,
MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,
MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
// FIXME: only needed by GeneralMatrixMatrixTriangular
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
// The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
: (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
: ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
|| ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit
|
ProductEvaluators.h:
35
36
37
38
39
40
41
42
43
44
45
46 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
};
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
// TODO we should apply that rule only if that's really helpful
template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
const Product<Lhs, Rhs, DefaultProduct> > >
{
static const bool value = true;
};
|
ProductEvaluators.h:
108
109
110
111
112
113
114
115
116
117
118
119 | : m_result(xpr.rows(), xpr.cols())
{
::new (static_cast<Base*>(this)) Base(m_result);
// FIXME shall we handle nested_eval here?,
// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
//
// const LhsNested lhs(xpr.lhs());
|
ProductEvaluators.h:
128
129
130
131
132
133
134
135
136
137
138
139 | PlainObject m_result;
};
// The following three shortcuts are enabled only if the scalar types match excatly.
// TODO: we could enable them for different scalar types when the product is not vectorized.
// Dense = Product
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
{
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
ProductEvaluators.h:
143
144
145
146
147
148
149
150
151
152
153
154 | Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
}
};
// Dense += Product
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
|
ProductEvaluators.h:
158
159
160
161
162
163
164
165
166
167
168
169 | static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
}
};
// Dense -= Product
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
|
ProductEvaluators.h:
173
174
175
176
177
178
179
180
181
182
183
184 | static EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
{
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
}
};
// Dense ?= scalar * Product
// TODO we should apply that rule if that's really helpful
|
ProductEvaluators.h:
180
181
182
183
184
185
186
187
188
189
190
191 | };
// Dense ?= scalar * Product
// TODO we should apply that rule if that's really helpful
// for instance, this is not good for inner products
template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>
{
typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
|
ProductEvaluators.h:
198
199
200
201
202
203
204
205
206
207
208 | };
//----------------------------------------
// Catch "Dense ?= xpr + Product<>" expression to save one temporary
// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
template<typename OtherXpr, typename Lhs, typename Rhs>
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
static const bool value = true;
};
|
ProductEvaluators.h:
272
273
274
275
276
277
278
279
280
281
282
283 | void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
{
evaluator<Rhs> rhsEval(rhs);
typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
// FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
// FIXME not very good if rhs is real and lhs complex while alpha is real too
const Index cols = dst.cols();
for (Index j=0; j<cols; ++j)
func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
}
// Row major result
|
ProductEvaluators.h:
273
274
275
276
277
278
279
280
281
282
283
284 | {
evaluator<Rhs> rhsEval(rhs);
typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
// FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
// FIXME not very good if rhs is real and lhs complex while alpha is real too
const Index cols = dst.cols();
for (Index j=0; j<cols; ++j)
func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
}
// Row major result
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
ProductEvaluators.h:
285
286
287
288
289
290
291
292
293
294
295
296 | void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
{
evaluator<Lhs> lhsEval(lhs);
typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
// FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
// FIXME not very good if lhs is real and rhs complex while alpha is real too
const Index rows = dst.rows();
for (Index i=0; i<rows; ++i)
func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
}
template<typename Lhs, typename Rhs>
|
ProductEvaluators.h:
286
287
288
289
290
291
292
293
294
295
296
297 | {
evaluator<Lhs> lhsEval(lhs);
typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
// FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
// FIXME not very good if lhs is real and rhs complex while alpha is real too
const Index rows = dst.rows();
for (Index i=0; i<rows; ++i)
func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
}
template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
|
ProductEvaluators.h:
298
299
300
301
302
303
304
305
306
307
308
309 | {
template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
struct adds {
Scalar m_scale;
explicit adds(const Scalar& s) : m_scale(s) {}
template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
|
ProductEvaluators.h:
523
524
525
526
527
528
529
530
531
532
533
534 | : (bool(RhsRowMajor) && !CanVectorizeLhs),
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0)
// TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
| (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
|
ProductEvaluators.h:
553
554
555
556
557
558
559
560
561
562
563
564 | }
/* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
* which is why we don't set the LinearAccessBit.
* TODO: this seems possible when the result is a vector
*/
EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
{
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
}
|
ProductEvaluators.h:
588
589
590
591
592
593
594
595
596
597
598 | LhsEtorType m_lhsImpl;
RhsEtorType m_rhsImpl;
// TODO: Get rid of m_innerDim if known at compile time
Index m_innerDim;
};
template<typename Lhs, typename Rhs>
struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
: product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>
{
|
ProductEvaluators.h:
779
780
781
782
783
784
785
786
787
788
789
790 | _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
_ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
_SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
// FIXME currently we need same types, but in the future the next rule should be the one
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
Alignment = evaluator<MatrixType>::Alignment,
AsScalarProduct = (DiagonalType::SizeAtCompileTime==1)
|
ProductEvaluators.h:
861
862
863
864
865
866
867
868
869
870
871
872 | #ifndef __CUDACC__
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
{
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
// See also similar calls below.
return this->template packet_impl<LoadMode,PacketType>(row,col, row,
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
}
template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index idx) const
|
ProductEvaluators.h:
940
941
942
943
944
945
946
947
948
949
950
951 | static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
{
MatrixType mat(xpr);
const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
// FIXME we need an is_same for expression that is not sensitive to constness. For instance
// is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
//if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
if(is_same_dense(dst, mat))
{
// apply the permutation inplace
Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
mask.fill(false);
|
ProductEvaluators.h:
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042 | /***************************************************************************
* Products with transpositions matrices
***************************************************************************/
// FIXME could we unify Transpositions and Permutation into a single "shape"??
/** \internal
* \class transposition_matrix_product
* Internal helper class implementing the product between a permutation matrix and a matrix.
*/
template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
struct transposition_matrix_product
|
Redux.h:
14
15
16
17
18
19
20
21
22
23
24 | namespace Eigen {
namespace internal {
// TODO
// * implement other kind of vectorization
// * factorize code
/***************************************************************************
* Part 1 : the logic deciding a strategy for vectorization and unrolling
***************************************************************************/
|
Redux.h:
344
345
346
347
348
349
350
351
352
353
354 | enum {
MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
// TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
IsRowMajor = XprType::IsRowMajor,
SizeAtCompileTime = XprType::SizeAtCompileTime,
InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
CoeffReadCost = evaluator<XprType>::CoeffReadCost,
Alignment = evaluator<XprType>::Alignment
};
|
Replicate.h:
28
29
30
31
32
33
34
35
36
37
38
39 | : RowFactor * MatrixType::RowsAtCompileTime,
ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic
? Dynamic
: ColFactor * MatrixType::ColsAtCompileTime,
//FIXME we don't propagate the max sizes !!!
MaxRowsAtCompileTime = RowsAtCompileTime,
MaxColsAtCompileTime = ColsAtCompileTime,
IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
: MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
: (MatrixType::Flags & RowMajorBit) ? 1 : 0,
// FIXME enable DirectAccess with negative strides?
|
Replicate.h:
35
36
37
38
39
40
41
42
43
44
45
46 | IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
: MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
: (MatrixType::Flags & RowMajorBit) ? 1 : 0,
// FIXME enable DirectAccess with negative strides?
Flags = IsRowMajor ? RowMajorBit : 0
};
};
}
/**
* \class Replicate
|
ReturnByValue.h:
31
32
33
34
35
36
37
38
39
40
41
42 | /* The ReturnByValue object doesn't even have a coeff() method.
* So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix.
* So internal::nested always gives the plain return matrix type.
*
* FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
* Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators
*/
template<typename Derived,int n,typename PlainObject>
struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
{
typedef typename traits<Derived>::ReturnType type;
};
|
ReturnByValue.h:
88
89
90
91
92
93
94
95
96
97
98
99 | namespace internal {
// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that
// when a ReturnByValue expression is assigned, the evaluator is not constructed.
// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world
template<typename Derived>
struct evaluator<ReturnByValue<Derived> >
: public evaluator<typename internal::traits<Derived>::ReturnType>
{
typedef ReturnByValue<Derived> XprType;
typedef typename internal::traits<Derived>::ReturnType PlainObject;
|
SelfAdjointView.h:
264
265
266
267
268
269
270
271
272
273
274
275 | // selfadjoint to dense matrix
namespace internal {
// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
// in the future selfadjoint-ness should be defined by the expression traits
// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
template<typename MatrixType, unsigned int Mode>
struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
{
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef SelfAdjointShape Shape;
|
SelfCwiseBinaryOp.h:
11
12
13
14
15
16
17
18
19
20
21
22 | #define EIGEN_SELFCWISEBINARYOP_H
namespace Eigen {
// TODO generalize the scalar type of 'other'
template<typename Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
return derived();
}
|
SolveTriangular.h:
57
58
59
60
61
62
63
64
65
66
67
68 | static void run(const Lhs& lhs, Rhs& rhs)
{
ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
// FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1
bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
(useRhsDirectly ? rhs.data() : 0));
if(!useRhsDirectly)
|
StableNorm.h:
43
44
45
46
47
48
49
50
51
52
53
54 | {
scale = maxCoeff;
}
// TODO if the maxCoeff is much much smaller than the current scale,
// then we can neglect this sub vector
if(scale>Scalar(0)) // if scale==0, then bl is 0
ssq += (bl*invScale).squaredNorm();
}
template<typename Derived>
inline typename NumTraits<typename traits<Derived>::Scalar>::Real
|
Swap.h:
49
50
51
52
53
54
55
56
57
58
59
60 | const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode,PacketType>(index));
m_dst.template writePacket<StoreMode>(index,tmp);
}
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
template<int StoreMode, int LoadMode, typename PacketType>
void assignPacketByOuterInner(Index outer, Index inner)
{
Index row = Base::rowIndexByOuterInner(outer, inner);
Index col = Base::colIndexByOuterInner(outer, inner);
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
}
|
Transpose.h:
133
134
135
136
137
138
139
140
141
142
143 | EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); }
// FIXME: shall we keep the const version of coeffRef?
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const
{
return derived().nestedExpression().coeffRef(colId, rowId);
}
EIGEN_DEVICE_FUNC
|
Transpose.h:
231
232
233
234
235
236
237
238
239
240
241
242 | m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
}
};
// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only.
template<typename MatrixType>
struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize
static void run(MatrixType& m) {
typedef typename MatrixType::Scalar Scalar;
typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
const Index PacketSize = internal::packet_traits<Scalar>::size;
const Index Alignment = internal::evaluator<MatrixType>::Alignment;
|
Transpositions.h:
82
83
84
85
86
87
88
89
90
91
92
93 | for(StorageIndex i = 0; i < indices().size(); ++i)
coeffRef(i) = i;
}
// FIXME: do we want such methods ?
// might be usefull when the target matrix expression is complex, e.g.:
// object.matrix().block(..,..,..,..) = trans * object.matrix().block(..,..,..,..);
/*
template<typename MatrixType>
void applyForwardToRows(MatrixType& mat) const
{
for(Index k=0 ; k<size() ; ++k)
|
TriangularMatrix.h:
550
551
552
553
554
555
556
557
558
559
560
561 | * Implementation of triangular evaluation/assignment
***************************************************************************/
#ifndef EIGEN_PARSED_BY_DOXYGEN
// FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
inline TriangularView<MatrixType, Mode>&
TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
{
internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
return derived();
|
TriangularMatrix.h:
560
561
562
563
564
565
566
567
568
569
570 | internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
return derived();
}
// FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
{
internal::call_assignment_no_alias(derived(), other.template triangularView<Mode>());
}
|
TriangularMatrix.h:
698
699
700
701
702
703
704
705
706
707
708 | namespace internal {
// TODO currently a triangular expression has the form TriangularView<.,.>
// in the future triangular-ness should be defined by the expression traits
// such that Transpose<TriangularView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
template<typename MatrixType, unsigned int Mode>
struct evaluator_traits<TriangularView<MatrixType,Mode> >
{
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;
|
TriangularMatrix.h:
851
852
853
854
855
856
857
858
859
860
861 | template<typename Kernel, unsigned int Mode, int UnrollCount, bool SetOpposite>
struct triangular_assignment_loop
{
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
enum {
col = (UnrollCount-1) / DstXprType::RowsAtCompileTime,
row = (UnrollCount-1) % DstXprType::RowsAtCompileTime
};
|
TriangularMatrix.h:
886
887
888
889
890
891
892
893
894
895
896
897 | };
// TODO: experiment with a recursive assignment procedure splitting the current
// triangular part into one rectangular and two triangular parts.
template<typename Kernel, unsigned int Mode, bool SetOpposite>
struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
{
typedef typename Kernel::Scalar Scalar;
|
MatrixBaseEigenvalues.h:
122
123
124
125
126
127
128
129
130
131
132
133 | MatrixBase<Derived>::operatorNorm() const
{
using std::sqrt;
typename Derived::PlainObject m_eval(derived());
// FIXME if it is really guaranteed that the eigenvalues are already sorted,
// then we don't need to compute a maxCoeff() here, comparing the 1st and last ones is enough.
return sqrt((m_eval*m_eval.adjoint())
.eval()
.template selfadjointView<Lower>()
.eigenvalues()
.maxCoeff()
);
|
RealSchur.h:
352
353
354
355
356
357
358
359
360
361
362
363 | template<typename MatrixType>
inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()
{
const Index size = m_matT.cols();
// FIXME to be efficient the following would requires a triangular reduxion code
// Scalar norm = m_matT.upper().cwiseAbs().sum()
// + m_matT.bottomLeftCorner(size-1,size-1).diagonal().cwiseAbs().sum();
Scalar norm(0);
for (Index j = 0; j < size; ++j)
norm += m_matT.col(j).segment(0, (std::min)(size,j+2)).cwiseAbs().sum();
return norm;
}
|
SelfAdjointEigenSolver.h:
449
450
451
452
453
454
455
456
457
458
459
460 | template<typename MatrixType>
SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
::computeFromTridiagonal(const RealVectorType& diag, const SubDiagonalType& subdiag , int options)
{
//TODO : Add an option to scale the values beforehand
bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;
m_eivalues = diag;
m_subdiag = subdiag;
if (computeEigenvectors)
{
m_eivec.setIdentity(diag.size(), diag.size());
|
SelfAdjointEigenSolver.h:
524
525
526
527
528
529
530
531
532
533
534
535 | else
info = NoConvergence;
// Sort eigenvalues and corresponding vectors.
// TODO make the sort optional ?
// TODO use a better sort algorithm !!
if (info == Success)
{
for (Index i = 0; i < n-1; ++i)
{
Index k;
diag.segment(i,n-i).minCoeff(&k);
|
SelfAdjointEigenSolver.h:
525
526
527
528
529
530
531
532
533
534
535
536 | info = NoConvergence;
// Sort eigenvalues and corresponding vectors.
// TODO make the sort optional ?
// TODO use a better sort algorithm !!
if (info == Success)
{
for (Index i = 0; i < n-1; ++i)
{
Index k;
diag.segment(i,n-i).minCoeff(&k);
if (k > 0)
|
SelfAdjointEigenSolver.h:
635
636
637
638
639
640
641
642
643
644
645
646 | VectorType& eivals = solver.m_eivalues;
// Shift the matrix to the mean eigenvalue and map the matrix coefficients to [-1:1] to avoid over- and underflow.
Scalar shift = mat.trace() / Scalar(3);
// TODO Avoid this copy. Currently it is necessary to suppress bogus values when determining maxCoeff and for computing the eigenvectors later
MatrixType scaledMat = mat.template selfadjointView<Lower>();
scaledMat.diagonal().array() -= shift;
Scalar scale = scaledMat.cwiseAbs().maxCoeff();
if(scale > 0) scaledMat /= scale; // TODO for scale==0 we could save the remaining operations
// compute the eigenvalues
computeRoots(scaledMat,eivals);
|
SelfAdjointEigenSolver.h:
855
856
857
858
859
860
861
862
863
864
865 | // apply the givens rotation to the unit matrix Q = Q * G
if (matrixQ)
{
// FIXME if StorageOrder == RowMajor this operation is not very efficient
Map<Matrix<Scalar,Dynamic,Dynamic,StorageOrder> > q(matrixQ,n,n);
q.applyOnTheRight(k,k+1,rot);
}
}
}
} // end namespace internal
|
Homogeneous.h:
266
267
268
269
270
271
272
273
274
275
276
277 | EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
{
// FIXME investigate how to allow lazy evaluation of this product when possible
dst = Block<const LhsMatrixTypeNested,
LhsMatrixTypeNested::RowsAtCompileTime,
LhsMatrixTypeNested::ColsAtCompileTime==Dynamic?Dynamic:LhsMatrixTypeNested::ColsAtCompileTime-1>
(m_lhs,0,0,m_lhs.rows(),m_lhs.cols()-1) * m_rhs;
dst += m_lhs.col(m_lhs.cols()-1).rowwise()
.template replicate<MatrixType::ColsAtCompileTime>(m_rhs.cols());
}
|
Homogeneous.h:
304
305
306
307
308
309
310
311
312
313
314
315 | EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
{
// FIXME investigate how to allow lazy evaluation of this product when possible
dst = m_lhs * Block<const RhsNested,
RhsNested::RowsAtCompileTime==Dynamic?Dynamic:RhsNested::RowsAtCompileTime-1,
RhsNested::ColsAtCompileTime>
(m_rhs,0,0,m_rhs.rows()-1,m_rhs.cols());
dst += m_rhs.row(m_rhs.rows()-1).colwise()
.template replicate<MatrixType::RowsAtCompileTime>(m_lhs.rows());
}
|
Homogeneous.h:
430
431
432
433
434
435
436
437
438
439
440
441 | homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst);
}
};
// TODO: the following specialization is to address a regression from 3.2 to 3.3
// In the future, this path should be optimized.
template<typename Lhs, typename RhsArg, int ProductTag>
struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, TriangularShape, HomogeneousShape, ProductTag>
{
template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
{
|
Hyperplane.h:
118
119
120
121
122
123
124
125
126
127
128
129 | /** Constructs a hyperplane passing through the parametrized line \a parametrized.
* If the dimension of the ambient space is greater than 2, then there isn't uniqueness,
* so an arbitrary choice is made.
*/
// FIXME to be consitent with the rest this could be implemented as a static Through function ??
EIGEN_DEVICE_FUNC explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
{
normal() = parametrized.direction().unitOrthogonal();
offset() = -parametrized.origin().dot(normal());
}
EIGEN_DEVICE_FUNC ~Hyperplane() {}
|
Quaternion.h:
662
663
664
665
666
667
668
669
670
671
672
673 | */
template <class Derived>
EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
{
// FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ??
Scalar n2 = this->squaredNorm();
if (n2 > Scalar(0))
return Quaternion<Scalar>(conjugate().coeffs() / n2);
else
{
// return an invalid result to flag the error
return Quaternion<Scalar>(Coefficients::Zero());
|
Scaling.h:
67
68
69
70
71
72
73
74
75
76
77
78 | return res;
}
/** Concatenates a uniform scaling and a linear transformation matrix */
// TODO returns an expression
template<typename Derived>
inline typename internal::plain_matrix_type<Derived>::type operator* (const MatrixBase<Derived>& other) const
{ return other * m_factor; }
template<typename Derived,int Dim>
inline Matrix<Scalar,Dim,Dim> operator*(const RotationBase<Derived,Dim>& r) const
{ return r.toRotationMatrix() * m_factor; }
|
BlockHouseholder.h:
62
63
64
65
66
67
68
69
70
71
72 | {
triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint()
* vectors.bottomRightCorner(rs, rt).template triangularView<UnitLower>();
// FIXME add .noalias() once the triangular product can work inplace
triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView<Upper>();
}
triFactor(i,i) = hCoeffs(i);
}
}
|
BlockHouseholder.h:
89
90
91
92
93
94
95
96
97
98
99 | // A -= V T V^* A
Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,
(VectorsType::MaxColsAtCompileTime==1 && MatrixType::MaxColsAtCompileTime!=1)?RowMajor:ColMajor,
VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat;
// FIXME add .noalias() once the triangular product can work inplace
if(forward) tmp = T.template triangularView<Upper>() * tmp;
else tmp = T.template triangularView<Upper>().adjoint() * tmp;
mat.noalias() -= V * tmp;
}
} // end namespace internal
|
IncompleteCholesky.h:
245
246
247
248
249
250
251
252
253
254
255
256 | m_scale(j) = RealScalar(1)/m_scale(j);
else
m_scale(j) = 1;
// TODO disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster)
// Scale and compute the shift for the matrix
RealScalar mindiag = NumTraits<RealScalar>::highest();
for (Index j = 0; j < n; j++)
{
for (Index k = colPtr[j]; k < colPtr[j+1]; k++)
vals[k] *= (m_scale(j)*m_scale(rowIdx[k]));
|
IncompleteLUT.h:
228
229
230
231
232
233
234
235
236
237
238
239 | #ifndef EIGEN_MPL2_ONLY
// To this end, let's symmetrize the pattern and perform AMD on it.
SparseMatrix<Scalar,ColMajor, StorageIndex> mat1 = amat;
SparseMatrix<Scalar,ColMajor, StorageIndex> mat2 = amat.transpose();
// FIXME for a matrix with nearly symmetric pattern, mat2+mat1 is the appropriate choice.
// on the other hand for a really non-symmetric pattern, mat2*mat1 should be prefered...
SparseMatrix<Scalar,ColMajor, StorageIndex> AtA = mat2 + mat1;
AMDOrdering<StorageIndex> ordering;
ordering(AtA,m_P);
m_Pinv = m_P.inverse(); // cache the inverse permutation
#else
// If AMD is not available, (MPL2-only), then let's use the slower COLAMD routine.
|
FullPivLU.h:
63
64
65
66
67
68
69
70
71
72
73
74 | typedef _MatrixType MatrixType;
typedef SolverBase<FullPivLU> Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU)
// FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
enum {
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
typedef typename internal::plain_row_type<MatrixType, StorageIndex>::type IntRowVectorType;
typedef typename internal::plain_col_type<MatrixType, StorageIndex>::type IntColVectorType;
typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType;
|
FullPivLU.h:
236
237
238
239
240
241
242
243
244
245
246
247 | * Output: \verbinclude FullPivLU_solve.out
*
* \sa TriangularView::solve(), kernel(), inverse()
*/
// FIXME this is a copy-paste of the base-class member to add the isInitialized assertion.
template<typename Rhs>
inline const Solve<FullPivLU, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "LU is not initialized.");
return Solve<FullPivLU, Rhs>(*this, b.derived());
}
|
FullPivLU.h:
596
597
598
599
600
601
602
603
604
605
606
607 | eigen_assert(m_isInitialized && "LU is not initialized.");
const Index smalldim = (std::min)(m_lu.rows(), m_lu.cols());
// LU
MatrixType res(m_lu.rows(),m_lu.cols());
// FIXME the .toDenseMatrix() should not be needed...
res = m_lu.leftCols(smalldim)
.template triangularView<UnitLower>().toDenseMatrix()
* m_lu.topRows(smalldim)
.template triangularView<Upper>().toDenseMatrix();
// P^{-1}(LU)
res = m_p.inverse() * res;
|
FullPivLU.h:
665
666
667
668
669
670
671
672
673
674
675 | // we construct a temporaty trapezoid matrix m, by taking the U matrix and
// permuting the rows and cols to bring the nonnegligible pivots to the top of
// the main diagonal. We need that to be able to apply our triangular solvers.
// FIXME when we get triangularView-for-rectangular-matrices, this can be simplified
Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options,
MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime>
m(dec().matrixLU().block(0, 0, rank(), cols));
for(Index i = 0; i < rank(); ++i)
{
if(i) m.row(i).head(i).setZero();
m.row(i).tail(cols-i) = dec().matrixLU().row(pivots.coeff(i)).tail(cols-i);
|
PartialPivLU.h:
79
80
81
82
83
84
85
86
87
88
89 | typedef _MatrixType MatrixType;
typedef SolverBase<PartialPivLU> Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU)
// FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
enum {
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
typedef typename MatrixType::PlainObject PlainObject;
|
PartialPivLU.h:
168
169
170
171
172
173
174
175
176
177
178
179 | * theoretically exists and is unique regardless of b.
*
* \sa TriangularView::solve(), inverse(), computeInverse()
*/
// FIXME this is a copy-paste of the base-class member to add the isInitialized assertion.
template<typename Rhs>
inline const Solve<PartialPivLU, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
return Solve<PartialPivLU, Rhs>(*this, b.derived());
}
|
PartialPivLU.h:
341
342
343
344
345
346
347
348
349
350
351
352 | /** \internal This is the blocked version of fullpivlu_unblocked() */
template<typename Scalar, int StorageOrder, typename PivIndex>
struct partial_lu_impl
{
// FIXME add a stride to Map, so that the following mapping becomes easier,
// another option would be to create an expression being able to automatically
// warp any Map, Matrix, and Block expressions as a unique type, but since that's exactly
// a Map + stride, why not adding a stride to Map, and convenient ctors from a Matrix,
// and Block.
typedef Map<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > MapLU;
typedef Block<MapLU, Dynamic, Dynamic> MatrixType;
typedef Block<MatrixType,Dynamic,Dynamic> BlockType;
|
PartialPivLU.h:
390
391
392
393
394
395
396
397
398
399
400
401 | lu.row(k).swap(lu.row(row_of_biggest_in_col));
++nb_transpositions;
}
// FIXME shall we introduce a safe quotient expression in cas 1/lu.coeff(k,k)
// overflow but not the actual quotient?
lu.col(k).tail(rrows) /= lu.coeff(k,k);
}
else if(first_zero_pivot==-1)
{
// the pivot is exactly zero, we record the index of the first pivot which is exactly 0,
// and continue the factorization such we still have A = PLU
|
MetisSupport.h:
112
113
114
115
116
117
118
119
120
121
122
123 | output_error = METIS_NodeND(&m, m_indexPtr.data(), m_innerIndices.data(), NULL, NULL, perm.data(), iperm.data());
if(output_error != METIS_OK)
{
//FIXME The ordering interface should define a class of possible errors
std::cerr << "ERROR WHILE CALLING THE METIS PACKAGE \n";
return;
}
// Get the fill-reducing permutation
//NOTE: If Ap is the permuted matrix then perm and iperm vectors are defined as follows
// Row (column) i of Ap is the perm(i) row(column) of A, and row (column) i of A is the iperm(i) row(column) of Ap
|
Ordering.h:
20
21
22
23
24
25
26
27
28
29
30
31 | /** \internal
* \ingroup OrderingMethods_Module
* \param[in] A the input non-symmetric matrix
* \param[out] symmat the symmetric pattern A^T+A from the input matrix \a A.
* FIXME: The values should not be considered here
*/
template<typename MatrixType>
void ordering_helper_at_plus_a(const MatrixType& A, MatrixType& symmat)
{
MatrixType C;
C = A.transpose(); // NOTE: Could be costly
for (int i = 0; i < C.rows(); i++)
|
ColPivHouseholderQR.h:
57
58
59
60
61
62
63
64
65
66
67
68 | MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
// FIXME should be int
typedef typename MatrixType::StorageIndex StorageIndex;
typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;
typedef typename internal::plain_row_type<MatrixType, Index>::type IntRowVectorType;
typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;
typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;
|
FullPivHouseholderQR.h:
66
67
68
69
70
71
72
73
74
75
76
77 | MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
// FIXME should be int
typedef typename MatrixType::StorageIndex StorageIndex;
typedef internal::FullPivHouseholderQRMatrixQReturnType<MatrixType> MatrixQReturnType;
typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
typedef Matrix<StorageIndex, 1,
EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime,RowsAtCompileTime), RowMajor, 1,
EIGEN_SIZE_MIN_PREFER_FIXED(MaxColsAtCompileTime,MaxRowsAtCompileTime)> IntDiagSizeVectorType;
typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;
|
FullPivHouseholderQR.h:
543
544
545
546
547
548
549
550
551
552
553 | {
eigen_assert(rhs.rows() == rows());
const Index l_rank = rank();
// FIXME introduce nonzeroPivots() and use it here. and more generally,
// make the same improvements in this dec as in FullPivLU.
if(l_rank==0)
{
dst.setZero();
return;
}
|
HouseholderQR.h:
53
54
55
56
57
58
59
60
61
62
63
64 | MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
// FIXME should be int
typedef typename MatrixType::StorageIndex StorageIndex;
typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, (MatrixType::Flags&RowMajorBit) ? RowMajor : ColMajor, MaxRowsAtCompileTime, MaxRowsAtCompileTime> MatrixQType;
typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;
/**
|
BDCSVD.h:
247
248
249
250
251
252
253
254
255
256
257 | //**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return
if(matrix.cols() < m_algoswap)
{
// FIXME this line involves temporaries
JacobiSVD<MatrixType> jsvd(matrix,computationOptions);
if(computeU()) m_matrixU = jsvd.matrixU();
if(computeV()) m_matrixV = jsvd.matrixV();
m_singularValues = jsvd.singularValues();
m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
m_isInitialized = true;
return *this;
|
BDCSVD.h:
265
266
267
268
269
270
271
272
273
274
275
276 | if (m_isTranspose) copy = matrix.adjoint()/scale;
else copy = matrix/scale;
//**** step 1 - Bidiagonalization
// FIXME this line involves temporaries
internal::UpperBidiagonalization<MatrixX> bid(copy);
//**** step 2 - Divide & Conquer
m_naiveU.setZero();
m_naiveV.setZero();
// FIXME this line involves a temporary matrix
m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
|
BDCSVD.h:
271
272
273
274
275
276
277
278
279
280
281 | //**** step 2 - Divide & Conquer
m_naiveU.setZero();
m_naiveV.setZero();
// FIXME this line involves a temporary matrix
m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
m_computed.template bottomRows<1>().setZero();
divide(0, m_diagSize - 1, 0, 0, 0);
//**** step 3 - Copy singular values and vectors
for (int i=0; i<m_diagSize; i++)
{
|
BDCSVD.h:
405
406
407
408
409
410
411
412
413
414
415
416 | // We use the other algorithm which is more efficient for small
// matrices.
if (n < m_algoswap)
{
// FIXME this line involves temporaries
JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0));
if (m_compU)
m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
else
{
m_naiveU.row(0).segment(firstCol, n + 1).real() = b.matrixU().row(0);
m_naiveU.row(1).segment(firstCol, n + 1).real() = b.matrixU().row(n);
|
BDCSVD.h:
563
564
565
566
567
568
569
570
571
572
573
574 | // the first column and on the diagonal and has undergone deflation, so diagonal is in increasing
// order except for possibly the (0,0) entry. The computed SVD is stored U, singVals and V, except
// that if m_compV is false, then V is not computed. Singular values are sorted in decreasing order.
//
// TODO Opportunities for optimization: better root finding algo, better stopping criterion, better
// handling of round-off errors, be consistent in ordering
// For instance, to solve the secular equation using FMM, see http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf
template <typename MatrixType>
void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
{
const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
using std::abs;
|
BDCSVD.h:
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023 | // page 13
// i,j >= 1, i!=j and |di - dj| < epsilon * norm2(M)
// We apply two rotations to have zj = 0;
// TODO deflation44 is still broken and not properly tested
template <typename MatrixType>
void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size)
{
using std::abs;
using std::sqrt;
using std::conj;
using std::pow;
|
AmbiVector.h:
189
190
191
192
193
194
195
196
197
198
199
200 | return m_buffer[i];
else
{
ListEl* EIGEN_RESTRICT llElements = reinterpret_cast<ListEl*>(m_buffer);
// TODO factorize the following code to reduce code generation
eigen_assert(m_mode==IsSparse);
if (m_llSize==0)
{
// this is the first element
m_llStart = 0;
m_llCurrent = 0;
++m_llSize;
|
ConservativeSparseSparseProduct.h:
85
86
87
88
89
90
91
92
93
94
95
96 | // alternative ordered insertion code:
const Index t200 = rows/11; // 11 == (log2(200)*1.39)
const Index t = (rows*100)/139;
// FIXME reserve nnz non zeros
// FIXME implement faster sorting algorithms for very small nnz
// if the result is sparse enough => use a quick sort
// otherwise => loop through the entire vector
// In order to avoid to perform an expensive log2 when the
// result is clearly very sparse we use a linear bound up to 200.
if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t)
{
|
ConservativeSparseSparseProduct.h:
86
87
88
89
90
91
92
93
94
95
96
97 | const Index t200 = rows/11; // 11 == (log2(200)*1.39)
const Index t = (rows*100)/139;
// FIXME reserve nnz non zeros
// FIXME implement faster sorting algorithms for very small nnz
// if the result is sparse enough => use a quick sort
// otherwise => loop through the entire vector
// In order to avoid to perform an expensive log2 when the
// result is clearly very sparse we use a linear bound up to 200.
if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t)
{
if(nnz>1) std::sort(indices,indices+nnz);
|
ConservativeSparseSparseProduct.h:
143
144
145
146
147
148
149
150
151
152
153
154 | typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime,ColMajorMatrixAux::Flags>::type ColMajorMatrix;
// If the result is tall and thin (in the extreme case a column vector)
// then it is faster to sort the coefficients inplace instead of transposing twice.
// FIXME, the following heuristic is probably not very good.
if(lhs.rows()>rhs.cols())
{
ColMajorMatrix resCol(lhs.rows(),rhs.cols());
// perform sorted insertion
internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true);
res = resCol.markAsRValue();
}
|
SparseAssign.h:
23
24
25
26
27
28
29
30
31
32
33
34 | template<typename Derived>
template<typename OtherDerived>
Derived& SparseMatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
{
// TODO use the evaluator mechanism
other.evalTo(derived());
return derived();
}
template<typename Derived>
template<typename OtherDerived>
inline Derived& SparseMatrixBase<Derived>::operator=(const SparseMatrixBase<OtherDerived>& other)
|
SparseCwiseBinaryOp.h:
28
29
30
31
32
33
34
35
36
37
38
39 | // generic sparse
// 4 - dense op dense product dense
// generic dense
//
// TODO to ease compiler job, we could specialize product/quotient with a scalar
// and fallback to cwise-unary evaluator using bind1st_op and bind2nd_op.
template<typename BinaryOp, typename Lhs, typename Rhs>
class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse>
: public SparseMatrixBase<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
{
public:
|
SparseDenseProduct.h:
70
71
72
73
74
75
76
77
78
79
80
81 | }
};
// FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
// -> let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators
// template<typename T1, typename T2/*, int _Options, typename _StrideType*/>
// struct ScalarBinaryOpTraits<T1, Ref<T2/*, _Options, _StrideType*/> >
// {
// enum {
// Defined = 1
// };
|
SparseMatrix.h:
518
519
520
521
522
523
524
525
526
527
528
529 | */
template<typename KeepFunc>
void prune(const KeepFunc& keep = KeepFunc())
{
// TODO optimize the uncompressed mode to avoid moving and allocating the data twice
makeCompressed();
StorageIndex k = 0;
for(Index j=0; j<m_outerSize; ++j)
{
Index previousStart = m_outerIndex[j];
m_outerIndex[j] = k;
|
SparseMatrix.h:
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027 | template<typename DupFunctor>
void SparseMatrix<Scalar,_Options,_StorageIndex>::collapseDuplicates(DupFunctor dup_func)
{
eigen_assert(!isCompressed());
// TODO, in practice we should be able to use m_innerNonZeros for that task
IndexVector wi(innerSize());
wi.fill(-1);
StorageIndex count = 0;
// for each inner-vector, wi[inner_index] will hold the position of first element into the index/value buffers
for(Index j=0; j<outerSize(); ++j)
{
StorageIndex start = count;
|
SparseMatrix.h:
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093 | SparseMatrix dest(other.rows(),other.cols());
Eigen::Map<IndexVector> (dest.m_outerIndex,dest.outerSize()).setZero();
// pass 1
// FIXME the above copy could be merged with that pass
for (Index j=0; j<otherCopy.outerSize(); ++j)
for (typename OtherCopyEval::InnerIterator it(otherCopyEval, j); it; ++it)
++dest.m_outerIndex[it.index()];
// prefix sum
StorageIndex count = 0;
IndexVector positions(dest.outerSize());
|
SparseMatrix.h:
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314 | bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0))
&& (std::size_t(m_outerIndex[outer+1]) == m_data.size());
std::size_t startId = m_outerIndex[outer];
// FIXME let's make sure sizeof(long int) == sizeof(std::size_t)
std::size_t p = m_outerIndex[outer+1];
++m_outerIndex[outer+1];
double reallocRatio = 1;
if (m_data.allocatedSize()<=m_data.size())
{
// if there is no preallocated memory, let's reserve a minimum of 32 elements
|
SparseMatrixBase.h:
109
110
111
112
113
114
115
116
117
118
119
120 | >::type AdjointReturnType;
typedef Transpose<Derived> TransposeReturnType;
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
// FIXME storage order do not match evaluator storage order
typedef SparseMatrix<Scalar, Flags&RowMajorBit ? RowMajor : ColMajor, StorageIndex> PlainObject;
#ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is the "real scalar" type; if the \a Scalar type is already real numbers
* (e.g. int, float or double) then \a RealScalar is just the same as \a Scalar. If
* \a Scalar is \a std::complex<T> then RealScalar is \a T.
*
|
SparseMatrixBase.h:
188
189
190
191
192
193
194
195
196
197
198 | bool isRValue() const { return m_isRValue; }
Derived& markAsRValue() { m_isRValue = true; return derived(); }
SparseMatrixBase() : m_isRValue(false) { /* TODO check flags */ }
template<typename OtherDerived>
Derived& operator=(const ReturnByValue<OtherDerived>& other);
template<typename OtherDerived>
inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other);
|
SparsePermutation.h:
87
88
89
90
91
92
93
94
95
96
97 | template <int ProductTag> struct product_promote_storage_type<Sparse, PermutationStorage, ProductTag> { typedef Sparse ret; };
template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Sparse, ProductTag> { typedef Sparse ret; };
// TODO, the following two overloads are only needed to define the right temporary type through
// typename traits<permutation_sparse_matrix_product<Rhs,Lhs,OnTheRight,false> >::ReturnType
// whereas it should be correctly handled by traits<Product<> >::PlainObject
template<typename Lhs, typename Rhs, int ProductTag>
struct product_evaluator<Product<Lhs, Rhs, AliasFreeProduct>, ProductTag, PermutationShape, SparseShape>
: public evaluator<typename permutation_matrix_product<Rhs,OnTheLeft,false,SparseShape>::ReturnType>
{
|
SparseRef.h:
347
348
349
350
351
352
353
354
355
356
357
358 | };
namespace internal {
// FIXME shall we introduce a general evaluatior_ref that we can specialize for any sparse object once, and thus remove this copy-pasta thing...
template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>
struct evaluator<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >
: evaluator<SparseCompressedBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > >
{
typedef evaluator<SparseCompressedBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > > Base;
typedef Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;
|
SparseSelfAdjointView.h:
122
123
124
125
126
127
128
129
130
131
132
133 | template<typename DerivedU>
SparseSelfAdjointView& rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
/** \returns an expression of P H P^-1 */
// TODO implement twists in a more evaluator friendly fashion
SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm) const
{
return SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode>(m_matrix, perm);
}
template<typename SrcMatrixType,int SrcMode>
SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcMode>& permutedMatrix)
|
SparseSelfAdjointView.h:
203
204
205
206
207
208
209
210
211
212
213
214 | }
namespace internal {
// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
// in the future selfadjoint-ness should be defined by the expression traits
// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
template<typename MatrixType, unsigned int Mode>
struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >
{
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef SparseSelfAdjointShape Shape;
|
SparseSelfAdjointView.h:
230
231
232
233
234
235
236
237
238
239
240
241 | {
internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), dst);
}
// FIXME: the handling of += and -= in sparse matrices should be cleanup so that next two overloads could be reduced to:
template<typename DestScalar,int StorageOrder,typename AssignFunc>
static void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src, const AssignFunc& func)
{
SparseMatrix<DestScalar,StorageOrder,StorageIndex> tmp(src.rows(),src.cols());
run(tmp, src, AssignOpType());
call_assignment_no_alias_no_transpose(dst, tmp, func);
}
|
SparseSelfAdjointView.h:
260
261
262
263
264
265
266
267
268
269
270 | template<typename DestScalar>
static void run(DynamicSparseMatrix<DestScalar,ColMajor,StorageIndex>& dst, const SrcXprType &src, const AssignOpType&/*func*/)
{
// TODO directly evaluate into dst;
SparseMatrix<DestScalar,ColMajor,StorageIndex> tmp(dst.rows(),dst.cols());
internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), tmp);
dst = tmp;
}
};
} // end namespace internal
|
SparseSelfAdjointView.h:
373
374
375
376
377
378
379
380
381
382
383
384 | }
};
// NOTE: these two overloads are needed to evaluate the sparse selfadjoint view into a full sparse matrix
// TODO: maybe the copy could be handled by generic_product_impl so that these overloads would not be needed anymore
template<typename LhsView, typename Rhs, int ProductTag>
struct product_evaluator<Product<LhsView, Rhs, DefaultProduct>, ProductTag, SparseSelfAdjointShape, SparseShape>
: public evaluator<typename Product<typename Rhs::PlainObject, Rhs, DefaultProduct>::PlainObject>
{
typedef Product<LhsView, Rhs, DefaultProduct> XprType;
typedef typename XprType::PlainObject PlainObject;
|
SparseSelfAdjointView.h:
581
582
583
584
585
586
587
588
589
590
591 | }
}
// TODO implement twists in a more evaluator friendly fashion
namespace internal {
template<typename MatrixType, int Mode>
struct traits<SparseSymmetricPermutationProduct<MatrixType,Mode> > : traits<MatrixType> {
};
|
SparseSparseProductWithPruning.h:
54
55
56
57
58
59
60
61
62
63
64
65 | res.reserve(estimated_nnz_prod);
double ratioColRes = double(estimated_nnz_prod)/(double(lhs.rows())*double(rhs.cols()));
for (Index j=0; j<cols; ++j)
{
// FIXME:
//double ratioColRes = (double(rhs.innerVector(j).nonZeros()) + double(lhs.nonZeros())/double(lhs.cols()))/double(lhs.rows());
// let's do a more accurate determination of the nnz ratio for the current column j of res
tempVector.init(ratioColRes);
tempVector.setZero();
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{
// FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
|
SparseSparseProductWithPruning.h:
61
62
63
64
65
66
67
68
69
70
71
72 | tempVector.init(ratioColRes);
tempVector.setZero();
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{
// FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
tempVector.restart();
RhsScalar x = rhsIt.value();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt)
{
tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x;
}
}
|
SparseUtil.h:
92
93
94
95
96
97
98
99
100
101
102
103 | public:
typedef SparseVector<_Scalar, ColMajor, _StorageIndex> type;
};
// TODO this seems almost identical to plain_matrix_type<T, Sparse>
template<typename T,int Rows,int Cols,int Flags> struct sparse_eval {
typedef typename traits<T>::Scalar _Scalar;
typedef typename traits<T>::StorageIndex _StorageIndex;
enum { _Options = ((Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };
public:
typedef SparseMatrix<_Scalar, _Options, _StorageIndex> type;
};
|
SparseVector.h:
184
185
186
187
188
189
190
191
192
193
194
195 | eigen_assert(i>=0 && i<m_size);
Index startId = 0;
Index p = Index(m_data.size()) - 1;
// TODO smart realloc
m_data.resize(p+2,1);
while ( (p >= startId) && (m_data.index(p) > i) )
{
m_data.index(p+1) = m_data.index(p);
m_data.value(p+1) = m_data.value(p);
--p;
|
SparseView.h:
75
76
77
78
79
80
81
82
83
84
85
86 | };
namespace internal {
// TODO find a way to unify the two following variants
// This is tricky because implementing an inner iterator on top of an IndexBased evaluator is
// not easy because the evaluators do not expose the sizes of the underlying expression.
template<typename ArgType>
struct unary_evaluator<SparseView<ArgType>, IteratorBased>
: public evaluator_base<SparseView<ArgType> >
{
|
TriangularSolver.h:
152
153
154
155
156
157
158
159
160
161
162
163 | if (tmp!=Scalar(0)) // optimization when other is actually sparse
{
if(!(Mode & UnitDiag))
{
// TODO replace this by a binary search. make sure the binary search is safe for partially sorted elements
LhsIterator it(lhsEval, i);
while(it && it.index()!=i)
++it;
eigen_assert(it && it.index()==i);
other.coeffRef(i,col) /= it.value();
}
LhsIterator it(lhsEval, i);
|
TriangularSolver.h:
223
224
225
226
227
228
229
230
231
232
233
234 | res.reserve(other.nonZeros());
for(Index col=0 ; col<other.cols() ; ++col)
{
// FIXME estimate number of non zeros
tempVector.init(.99/*float(other.col(col).nonZeros())/float(other.rows())*/);
tempVector.setZero();
tempVector.restart();
for (typename Rhs::InnerIterator rhsIt(other, col); rhsIt; ++rhsIt)
{
tempVector.coeffRef(rhsIt.index()) = rhsIt.value();
}
|
TriangularSolver.h:
270
271
272
273
274
275
276
277
278
279
280
281 | }
Index count = 0;
// FIXME compute a reference value to filter zeros
for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector/*,1e-12*/); it; ++it)
{
++ count;
// std::cerr << "fill " << it.index() << ", " << col << "\n";
// std::cout << it.value() << " ";
// FIXME use insertBack
res.insert(it.index(), col) = it.value();
|
TriangularSolver.h:
276
277
278
279
280
281
282
283
284
285
286
287 | {
++ count;
// std::cerr << "fill " << it.index() << ", " << col << "\n";
// std::cout << it.value() << " ";
// FIXME use insertBack
res.insert(it.index(), col) = it.value();
}
// std::cout << "tempVector.nonZeros() == " << int(count) << " / " << (other.rows()) << "\n";
}
res.finalize();
other = res.markAsRValue();
}
|
SparseLU.h:
410
411
412
413
414
415
416
417
418
419
420
421 | template <typename MatrixType, typename OrderingType>
void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
{
//TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat.
// Firstly, copy the whole input matrix.
m_mat = mat;
// Compute fill-in ordering
OrderingType ord;
ord(m_mat,m_perm_c);
|
SparseLU.h:
666
667
668
669
670
671
672
673
674
675
676
677 | return;
}
// Update the determinant of the row permutation matrix
// FIXME: the following test is not correct, we should probably take iperm_c into account and pivrow is not directly the row pivot.
if (pivrow != jj) m_detPermR = -m_detPermR;
// Prune columns (0:jj-1) using column jj
Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);
// Reset repfnz for this column
for (i = 0; i < nseg; i++)
|
SparseLU_SupernodalMatrix.h:
23
24
25
26
27
28
29
30
31
32
33
34 | *
* NOTE : This class corresponds to the SCformat structure in SuperLU
*
*/
/* TODO
* InnerIterator as for sparsematrix
* SuperInnerIterator to iterate through all supernodes
* Function for triangular solve
*/
template <typename _Scalar, typename _StorageIndex>
class MappedSuperNodalMatrix
{
|
SparseLU_SupernodalMatrix.h:
54
55
56
57
58
59
60
61
62
63
64
65 | }
/**
* Set appropriate pointers for the lower triangular supernodal matrix
* These infos are available at the end of the numerical factorization
* FIXME This class will be modified such that it can be use in the course
* of the factorization.
*/
void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,
IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
{
m_row = m;
m_col = n;
|
SparseQR.h:
481
482
483
484
485
486
487
488
489
490
491 | tdot *= m_hcoeffs(curIdx);
// Then update tval = tval - q * tau
// FIXME: tval -= tdot * m_Q.col(curIdx) should amount to the same (need to check/add support for efficient "dense ?= sparse")
for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)
tval(itq.row()) -= itq.value() * tdot;
// Detect fill-in for the current column of Q
if(m_etree(Ridx(i)) == nonzeroCol)
{
for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)
|
SparseQR.h:
506
507
508
509
510
511
512
513
514
515
516 | if(nonzeroCol < diagSize)
{
// Compute the Householder reflection that eliminate the current column
// FIXME this step should call the Householder module.
Scalar c0 = nzcolQ ? tval(Qidx(0)) : Scalar(0);
// First, the squared norm of Q((col+1):m, col)
RealScalar sqrNorm = 0.;
for (Index itq = 1; itq < nzcolQ; ++itq) sqrNorm += numext::abs2(tval(Qidx(itq)));
if(sqrNorm == RealScalar(0) && numext::imag(c0) == RealScalar(0))
{
|
SparseQR.h:
686
687
688
689
690
691
692
693
694
695
696
697 | }
const SparseQRType& m_qr;
};
// TODO this actually represents the adjoint of Q
template<typename SparseQRType>
struct SparseQRMatrixQTransposeReturnType
{
explicit SparseQRMatrixQTransposeReturnType(const SparseQRType& qr) : m_qr(qr) {}
template<typename Derived>
SparseQR_QProduct<SparseQRType,Derived> operator*(const MatrixBase<Derived>& other)
{
|
SuperLUSupport.h:
215
216
217
218
219
220
221
222
223
224
225 | res.storage.outerInd = mat.outerIndexPtr();
res.setScalarType<typename MatrixType::Scalar>();
// FIXME the following is not very accurate
if (MatrixType::Flags & Upper)
res.Mtype = SLU_TRU;
if (MatrixType::Flags & Lower)
res.Mtype = SLU_TRL;
eigen_assert(((MatrixType::Flags & SelfAdjoint)==0) && "SelfAdjoint matrix shape not supported by SuperLU");
|
SuperLUSupport.h:
274
275
276
277
278
279
280
281
282
283
284
285 | res.storage.outerInd = mat.outerIndexPtr();
res.setScalarType<typename MatrixType::Scalar>();
// FIXME the following is not very accurate
if (MatrixType::Flags & Upper)
res.Mtype = SLU_TRU;
if (MatrixType::Flags & Lower)
res.Mtype = SLU_TRL;
eigen_assert(((MatrixType::Flags & SelfAdjoint)==0) && "SelfAdjoint matrix shape not supported by SuperLU");
}
|
SuperLUSupport.h:
638
639
640
641
642
643
644
645
646
647
648
649 | StatFree(&m_sluStat);
m_extractedDataAreDirty = true;
// FIXME how to better check for errors ???
m_info = info == 0 ? Success : NumericalIssue;
m_factorizationIsOk = true;
}
template<typename MatrixType>
template<typename Rhs,typename Dest>
void SuperLU<MatrixType>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const
|
SuperLUSupport.h:
961
962
963
964
965
966
967
968
969
970
971
972 | &recip_pivot_growth, &rcond,
&m_sluStat, &info, Scalar());
StatFree(&m_sluStat);
// FIXME how to better check for errors ???
m_info = info == 0 ? Success : NumericalIssue;
m_factorizationIsOk = true;
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename MatrixType>
template<typename Rhs,typename Dest>
|
UmfPackSupport.h:
11
12
13
14
15
16
17
18
19
20
21 | #define EIGEN_UMFPACKSUPPORT_H
namespace Eigen {
/* TODO extract L, extract U, compute det, etc... */
// generic double/complex<double> wrapper functions:
inline void umfpack_defaults(double control[UMFPACK_CONTROL], double)
{ umfpack_di_defaults(control); }
|
ArrayCwiseBinaryOps.h:
103
104
105
106
107
108
109
110
111
112
113
114 | const CwiseBinaryOp<internal::scalar_pow_op<Scalar,T>,Derived,Constant<T> > pow(const T& exponent) const;
#endif
// TODO code generating macros could be moved to Macros.h and could include generation of documentation
#define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \
template<typename OtherDerived> \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived> \
OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
{ \
return CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived>(derived(), other.derived()); \
}\
|
mpreal.h:
740
741
742
743
744
745
746
747
748
749
750
751 | namespace internal{
// Use SFINAE to restrict arithmetic operations instantiation only for numeric types
// This is needed for smooth integration with libraries based on expression templates, like Eigen.
// TODO: Do the same for boolean operators.
template <typename ArgumentType> struct result_type {};
template <> struct result_type<mpreal> {typedef mpreal type;};
template <> struct result_type<mpz_t> {typedef mpreal type;};
template <> struct result_type<mpq_t> {typedef mpreal type;};
template <> struct result_type<long double> {typedef mpreal type;};
template <> struct result_type<double> {typedef mpreal type;};
|
mpreal.h:
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763 | #endif
inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const
{
// TODO: Add extended format specification (f, e, rounding mode) as it done in output operator
(void)b;
(void)mode;
#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
std::ostringstream format;
|
mpreal.h:
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913 | }
inline std::istream& operator>>(std::istream &is, mpreal& v)
{
// TODO: use cout::hexfloat and other flags to setup base
std::string tmp;
is >> tmp;
mpfr_set_str(v.mpfr_ptr(), tmp.c_str(), 10, mpreal::get_default_rnd());
return is;
}
//////////////////////////////////////////////////////////////////////////
|
AssignmentFunctors.h:
144
145
146
147
148
149
150
151
152
153
154
155 | EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
{
#ifdef __CUDACC__
// FIXME is there some kind of cuda::swap?
Scalar t=b; const_cast<Scalar&>(b)=a; a=t;
#else
using std::swap;
swap(a,const_cast<Scalar&>(b));
#endif
}
};
|
BinaryFunctors.h:
51
52
53
54
55
56
57
58
59
60
61
62 | struct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {
enum {
Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2, // rough estimate!
PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAdd && packet_traits<RhsScalar>::HasAdd
// TODO vectorize mixed sum
};
};
/** \internal
* \brief Template specialization to deprecate the summation of boolean expressions.
* This is required to solve Bug 426.
* \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast()
|
BinaryFunctors.h:
95
96
97
98
99
100
101
102
103
104
105
106 | struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
enum {
Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
// TODO vectorize mixed product
};
};
/** \internal
* \brief Template functor to compute the conjugate product of two scalars
*
* This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
|
UnaryFunctors.h:
753
754
755
756
757
758
759
760
761
762
763
764 | EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
{
return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
}
//TODO
//template <typename Packet>
//EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
};
template<typename Scalar>
struct scalar_sign_op<Scalar,true> {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
|
UnaryFunctors.h:
769
770
771
772
773
774
775
776
777
778
779
780 | return Scalar(0);
aa = real_type(1)/aa;
return Scalar(real(a)*aa, imag(a)*aa );
}
//TODO
//template <typename Packet>
//EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
};
template<typename Scalar>
struct functor_traits<scalar_sign_op<Scalar> >
{ enum {
Cost =
|
GeneralBlockPanelKernel.h:
188
189
190
191
192
193
194
195
196
197
198
199 | }
// ---- 2nd level of blocking on max(L2,L3), yields nc ----
// TODO find a reliable way to get the actual amount of cache per core to use for 2nd level blocking, that is:
// actual_l2 = max(l2, l3/nb_core_sharing_l3)
// The number below is quite conservative: it is better to underestimate the cache size rather than overestimating it)
// For instance, it corresponds to 6MB of L3 shared among 4 cores.
#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
const Index actual_l2 = l3;
#else
const Index actual_l2 = 1572864; // == 1.5 MB
|
GeneralBlockPanelKernel.h:
232
233
234
235
236
237
238
239
240
241
242
243 | else if(old_k==k)
{
// So far, no blocking at all, i.e., kc==k, and nc==n.
// In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2
// TODO: part of this blocking strategy is now implemented within the kernel itself, so the L1-based heuristic here should be obsolete.
Index problem_size = k*n*sizeof(LhsScalar);
Index actual_lm = actual_l2;
Index max_mc = m;
if(problem_size<=1024)
{
// problem is small enough to keep in L1
// Let's choose m such that lhs's block fit in 1/3 of L1
|
GeneralBlockPanelKernel.h:
310
311
312
313
314
315
316
317
318
319
320
321 | #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
#define CJMADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
#else
// FIXME (a bit overkill maybe ?)
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
{
c = cj.pmadd(a,b,c);
}
};
|
GeneralBlockPanelKernel.h:
613
614
615
616
617
618
619
620
621
622
623 | ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
// FIXME: should depend on NumberOfRegisters
nr = 4,
mr = ResPacketSize,
LhsProgress = ResPacketSize,
RhsProgress = 1
};
|
GeneralBlockPanelKernel.h:
663
664
665
666
667
668
669
670
671
672
673
674 | }
EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
{
// FIXME not sure that's the best way to implement it!
loadRhs(b+0, b0);
loadRhs(b+1, b1);
loadRhs(b+2, b2);
loadRhs(b+3, b3);
}
// Vectorized path
|
GeneralBlockPanelKernel.h:
673
674
675
676
677
678
679
680
681
682
683 | // Vectorized path
EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, DoublePacketType& b0, DoublePacketType& b1)
{
// FIXME not sure that's the best way to implement it!
loadRhs(b+0, b0);
loadRhs(b+1, b1);
}
// Scalar path
EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsScalar& b0, RhsScalar& b1)
{
|
GeneralBlockPanelKernel.h:
681
682
683
684
685
686
687
688
689
690
691 | // Scalar path
EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsScalar& b0, RhsScalar& b1)
{
// FIXME not sure that's the best way to implement it!
loadRhs(b+0, b0);
loadRhs(b+1, b1);
}
// nothing special here
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
{
|
GeneralBlockPanelKernel.h:
761
762
763
764
765
766
767
768
769
770
771 | RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
// FIXME: should depend on NumberOfRegisters
nr = 4,
mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*ResPacketSize,
LhsProgress = ResPacketSize,
RhsProgress = 1
};
|
GeneralMatrixMatrixTriangular_BLAS.h:
66
67
68
69
70
71
72
73
74
75
76
77 | };
EIGEN_BLAS_RANKUPDATE_SPECIALIZE(double)
EIGEN_BLAS_RANKUPDATE_SPECIALIZE(float)
// TODO handle complex cases
// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(dcomplex)
// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(scomplex)
// SYRK for float/double
#define EIGEN_BLAS_RANKUPDATE_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
|
GeneralMatrixMatrixTriangular_BLAS.h:
132
133
134
135
136
137
138
139
140
141
142
143 | EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_)
#endif
// TODO hanlde complex cases
// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
// EIGEN_BLAS_RANKUPDATE_C(scomplex, float, float, cherk_)
} // end namespace internal
} // end namespace Eigen
|
GeneralMatrixVector.h:
147
148
149
150
151
152
153
154
155
156
157 | alignmentPattern = NoneAligned;
}
else if(LhsPacketSize > 4)
{
// TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
// Currently, it seems to be better to perform unaligned loads anyway
alignmentPattern = NoneAligned;
}
else if (LhsPacketSize>1)
{
// eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
|
GeneralMatrixVector.h:
422
423
424
425
426
427
428
429
430
431
432
433 | alignmentPattern = NoneAligned;
}
else if(LhsPacketSize > 4)
{
// TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
alignmentPattern = NoneAligned;
}
else if (LhsPacketSize>1)
{
// eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize);
while (skipRows<LhsPacketSize &&
|
GeneralMatrixVector.h:
462
463
464
465
466
467
468
469
470
471 | Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
{
// FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ??
EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
// this helps the compiler generating good binary code
const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0), lhs1 = lhs.getVectorMapper(i+offset1, 0),
lhs2 = lhs.getVectorMapper(i+2, 0), lhs3 = lhs.getVectorMapper(i+offset3, 0);
|
GeneralMatrixVector.h:
477
478
479
480
481
482
483
484
485
486
487 | ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
// process initial unaligned coeffs
// FIXME this loop get vectorized by the compiler !
for (Index j=0; j<alignedStart; ++j)
{
RhsScalar b = rhs(j, 0);
tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
}
|
GeneralMatrixVector.h:
552
553
554
555
556
557
558
559
560
561
562
563 | }
} // end explicit vectorization
// process remaining coeffs (or all if no explicit vectorization)
// FIXME this loop get vectorized by the compiler !
for (Index j=alignedSize; j<depth; ++j)
{
RhsScalar b = rhs(j, 0);
tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
}
res[i*resIncr] += alpha*tmp0;
|
GeneralMatrixVector.h:
576
577
578
579
580
581
582
583
584
585
586
587 | EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
ResPacket ptmp0 = pset1<ResPacket>(tmp0);
const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
// process first unaligned result's coeffs
// FIXME this loop get vectorized by the compiler !
for (Index j=0; j<alignedStart; ++j)
tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
if (alignedSize>alignedStart)
{
// process aligned rhs coeffs
if (lhs0.template aligned<LhsPacket>(alignedStart))
|
GeneralMatrixVector.h:
593
594
595
596
597
598
599
600
601
602
603
604 | tmp0 += predux(ptmp0);
}
// process remaining scalars
// FIXME this loop get vectorized by the compiler !
for (Index j=alignedSize; j<depth; ++j)
tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
res[i*resIncr] += alpha*tmp0;
}
if (skipRows)
{
start = 0;
|
Parallelizer.h:
84
85
86
87
88
89
90
91
92
93
94 | template<bool Condition, typename Functor, typename Index>
void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
{
// TODO when EIGEN_USE_BLAS is defined,
// we should still enable OMP for other scalar types
#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
// FIXME the transpose variable is only needed to properly split
// the matrix product when multithreading is enabled. This is a temporary
// fix to support row-major destination matrices. This whole
// parallelizer mechanism has to be redisigned anyway.
EIGEN_UNUSED_VARIABLE(depth);
|
Parallelizer.h:
87
88
89
90
91
92
93
94
95
96
97
98 | {
// TODO when EIGEN_USE_BLAS is defined,
// we should still enable OMP for other scalar types
#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
// FIXME the transpose variable is only needed to properly split
// the matrix product when multithreading is enabled. This is a temporary
// fix to support row-major destination matrices. This whole
// parallelizer mechanism has to be redisigned anyway.
EIGEN_UNUSED_VARIABLE(depth);
EIGEN_UNUSED_VARIABLE(transpose);
func(0,rows, 0,cols);
#else
|
Parallelizer.h:
118
119
120
121
122
123
124
125
126
127
128
129 | Index threads = std::min<Index>(nbThreads(), pb_max_threads);
// if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session,
// then abort multi-threading
// FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
return func(0,rows, 0,cols);
Eigen::initParallel();
func.initParallelSession(threads);
if(transpose)
|
TriangularMatrixMatrix_BLAS.h:
108
109
110
111
112
113
114
115
116
117
118
119 | \
/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
if (rows != depth) { \
\
/* FIXME handle mkl_domain_get_max_threads */ \
/*int nthr = mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS);*/ int nthr = 1;\
\
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
/* Most likely no benefit to call TRMM or GEMM from BLAS */ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
|
TriangularMatrixVector.h:
198
199
200
201
202
203
204
205
206
207
208
209 | } // end namespace internal
namespace internal {
// TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same.
template<int Mode> struct trmv_selector<Mode,ColMajor>
{
template<typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
typedef typename Lhs::Scalar LhsScalar;
|
TriangularMatrixVector.h:
225
226
227
228
229
230
231
232
233
234
235
236 | RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
};
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
|
TriangularSolverMatrix.h:
114
115
116
117
118
119
120
121
122
123
124
125 | Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
// tr solve
for (Index k=0; k<actualPanelWidth; ++k)
{
// TODO write a small kernel handling this (can be shared with trsv)
Index i = IsLower ? k2+k1+k : k2-k1-k-1;
Index rs = actualPanelWidth - k - 1; // remaining size
Index s = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
: IsLower ? i+1 : i-rs;
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
for (Index j=j2; j<j2+actual_cols; ++j)
|
Constants.h:
251
252
253
254
255
256
257
258
259
260
261
262 | };
/** \ingroup enums
* Enum used by DenseBase::corner() in Eigen2 compatibility mode. */
// FIXME after the corner() API change, this was not needed anymore, except by AlignedBox
// TODO: find out what to do with that. Adapt the AlignedBox API ?
enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
/** \ingroup enums
* Enum containing possible values for the \p Direction parameter of
* Reverse, PartialReduxExpr and VectorwiseOp. */
enum DirectionType {
|
Constants.h:
252
253
254
255
256
257
258
259
260
261
262 | /** \ingroup enums
* Enum used by DenseBase::corner() in Eigen2 compatibility mode. */
// FIXME after the corner() API change, this was not needed anymore, except by AlignedBox
// TODO: find out what to do with that. Adapt the AlignedBox API ?
enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
/** \ingroup enums
* Enum containing possible values for the \p Direction parameter of
* Reverse, PartialReduxExpr and VectorwiseOp. */
enum DirectionType {
/** For Reverse, all columns are reversed;
|
Macros.h:
133
134
135
136
137
138
139
140
141
142 | #define EIGEN_GNUC_AT_MOST(x,y) 0
#define EIGEN_GNUC_AT(x,y) 0
#endif
// FIXME: could probably be removed as we do not support gcc 3.x anymore
#if EIGEN_COMP_GNUC && (__GNUC__ <= 3)
#define EIGEN_GCC3_OR_OLDER 1
#else
#define EIGEN_GCC3_OR_OLDER 0
#endif
|
Macros.h:
497
498
499
500
501
502
503
504
505
506
507 | // EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
// attribute to maximize inlining. This should only be used when really necessary: in particular,
// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
// FIXME with the always_inline attribute,
// gcc 3.4.x and 4.1 reports the following compilation error:
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
// : function body not available
// See also bug 1367
#if EIGEN_GNUC_AT_LEAST(4,2)
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
#else
|
Macros.h:
652
653
654
655
656
657
658
659
660
661
662
663 | #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#elif EIGEN_COMP_MSVC
#define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
#elif EIGEN_COMP_SUNCC
// FIXME not sure about this one:
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#else
#error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
#endif
// If the user explicitly disable vectorization, then we also disable alignment
#if defined(EIGEN_DONT_VECTORIZE)
|
Macros.h:
865
866
867
868
869
870
871
872
873
874
875
876 | using Base::derived; \
using Base::const_cast_derived;
// FIXME Maybe the EIGEN_DENSE_PUBLIC_INTERFACE could be removed as importing PacketScalar is rarely needed
#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
typedef typename Base::PacketScalar PacketScalar;
#define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
#define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
|
Meta.h:
454
455
456
457
458
459
460
461
462
463
464 | {
enum { Defined = 0 };
};
// FIXME quick workaround around current limitation of result_of
// template<typename Scalar, typename ArgType0, typename ArgType1>
// struct result_of<scalar_product_op<Scalar>(ArgType0,ArgType1)> {
// typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type;
// };
} // end namespace internal
|
StaticAssert.h:
210
211
212
213
214
215
216
217
218 | >::value), \
YOU_CANNOT_MIX_ARRAYS_AND_MATRICES)
// Check that a cost value is positive, and that is stay within a reasonable range
// TODO this check could be enabled for internal debugging only
#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \
EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE);
#endif // EIGEN_STATIC_ASSERT_H
|
XprHelper.h:
11
12
13
14
15
16
17
18
19
20
21
22 | #ifndef EIGEN_XPRHELPER_H
#define EIGEN_XPRHELPER_H
// just a workaround because GCC seems to not really like empty structs
// FIXME: gcc 4.3 generates bad code when strict-aliasing is enabled
// so currently we simply disable this optimization for gcc 4.3
#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3)
#define EIGEN_EMPTY_STRUCT_CTOR(X) \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {}
#else
#define EIGEN_EMPTY_STRUCT_CTOR(X)
|
XprHelper.h:
251
252
253
254
255
256
257
258
259
260
261
262 | class compute_matrix_flags
{
enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };
public:
// FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<>
// and then propagate this information to the evaluator's flags.
// However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage.
enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
};
template<int _Rows, int _Cols> struct size_at_compile_time
{
|
AutoDiffVector.h:
59
60
61
62
63
64
65
66
67
68
69 | const CoeffType coeffRef(Index i) const { return CoeffType(m_values[i], m_jacobian.col(i)); }
Index size() const { return m_values.size(); }
// FIXME here we could return an expression of the sum
Scalar sum() const { /*std::cerr << "sum \n\n";*/ /*std::cerr << m_jacobian.rowwise().sum() << "\n\n";*/ return Scalar(m_values.sum(), m_jacobian.rowwise().sum()); }
inline AutoDiffVector(const ValueType& values, const JacobianType& jac)
: m_values(values), m_jacobian(jac)
{}
|
EulerAngles.h:
294
295
296
297
298
299
300
301
302
303
304
305 | }
/*EulerAngles& fromQuaternion(const QuaternionType& q)
{
// TODO: Implement it in a faster way for quaternions
// According to http://www.euclideanspace.com/maths/geometry/rotations/conversions/quaternionToEuler/
// we can compute only the needed matrix cells and then convert to euler angles. (see ZYX example below)
// Currently we compute all matrix cells from quaternion.
// Special case only for ZYX
//Scalar y2 = q.y() * q.y();
//m_angles[0] = std::atan2(2*(q.w()*q.z() + q.x()*q.y()), (1 - 2*(y2 + q.z()*q.z())));
|
EulerAngles.h:
315
316
317
318
319
320
321
322
323
324
325
326 | System::CalcEulerAngles(*this, m);
return *this;
}
// TODO: Assign and construct from another EulerAngles (with different system)
/** Set \c *this from a rotation. */
template<typename Derived>
EulerAngles& operator=(const RotationBase<Derived, 3>& rot) {
System::CalcEulerAngles(*this, rot.toRotationMatrix());
return *this;
}
|
EulerAngles.h:
324
325
326
327
328
329
330
331
332
333
334 | System::CalcEulerAngles(*this, rot.toRotationMatrix());
return *this;
}
// TODO: Support isApprox function
/** \returns an equivalent 3x3 rotation matrix. */
Matrix3 toRotationMatrix() const
{
return static_cast<QuaternionType>(*this).toRotationMatrix();
}
|
EulerSystem.h:
17
18
19
20
21
22
23
24
25
26
27
28 | class EulerAngles;
namespace internal
{
// TODO: Check if already exists on the rest API
template <int Num, bool IsPositive = (Num > 0)>
struct Abs
{
enum { value = Num };
};
template <int Num>
|
EulerSystem.h:
174
175
176
177
178
179
180
181
182
183
184 | J = (AlphaAxisAbs - 1 + 1 + IsOdd)%3,
K = (AlphaAxisAbs - 1 + 2 - IsOdd)%3
};
// TODO: Get @mat parameter in form that avoids double evaluation.
template <typename Derived>
static void CalcEulerAngles_imp(Matrix<typename MatrixBase<Derived>::Scalar, 3, 1>& res, const MatrixBase<Derived>& mat, internal::true_type /*isTaitBryan*/)
{
using std::atan2;
using std::sin;
using std::cos;
|
ei_kissfft_impl.h:
387
388
389
390
391
392
393
394
395
396
397 | inline
PlanData & get_plan(int nfft, bool inverse)
{
// TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles
PlanData & pd = m_plans[ PlanKey(nfft,inverse) ];
if ( pd.m_twiddles.size() == 0 ) {
pd.make_twiddles(nfft,inverse);
pd.factorize(nfft);
}
return pd;
}
|
ConstrainedConjGrad.h:
48
49
50
51
52
53
54
55
56
57
58
59 | {
// optimisable : copie de la ligne, precalcul de C * trans(C).
typedef typename CMatrix::Scalar Scalar;
typedef typename CMatrix::Index Index;
// FIXME use sparse vectors ?
typedef Matrix<Scalar,Dynamic,1> TmpVec;
Index rows = C.rows(), cols = C.cols();
TmpVec d(rows), e(rows), l(cols), p(rows), q(rows), r(rows);
Scalar rho, rho_1, alpha;
d.setZero();
|
ConstrainedConjGrad.h:
82
83
84
85
86
87
88
89
90
91
92
93 | p = (rho/rho_1) * p + r;
}
l = C.transpose() * e; // l is the i-th row of CINV
// FIXME add a generic "prune/filter" expression for both dense and sparse object to sparse
for (Index j=0; j<l.size(); ++j)
if (l[j]<1e-15)
tripletList.push_back(T(i,j,l(j)));
d[i] = 0.0;
}
|
ConstrainedConjGrad.h:
144
145
146
147
148
149
150
151
152
153
154 | transition = true;
}
Scalar bb = CINV.row(i).dot(z);
if (bb > 0.0)
// FIXME: we should allow that: z += -bb * C.row(i);
for (typename CMatrix::InnerIterator it(C,i); it; ++it)
z.coeffRef(it.index()) -= bb*it.value();
}
else
satured[i] = false;
}
|
DGMRES.h:
32
33
34
35
36
37
38
39
40
41
42
43 | * \param perm gives the sorted sequence on output. Must be initialized with 0..n-1
* \param ncut Put the ncut smallest elements at the end of the vector
* WARNING This is an expensive sort, so should be used only
* for small size vectors
* TODO Use modified QuickSplit or std::nth_element to get the smallest values
*/
template <typename VectorType, typename IndexType>
void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::Scalar& ncut)
{
eigen_assert(vec.size() == perm.size());
bool flag;
for (Index k = 0; k < ncut; k++)
|
DGMRES.h:
322
323
324
325
326
327
328
329
330
331
332
333 | m_V.col(it+1) = tv1/coef;
m_H(it+1, it) = coef;
// m_Hes(it+1,it) = coef;
// FIXME Check for happy breakdown
// Update Hessenberg matrix with Givens rotations
for (Index i = 1; i <= it; ++i)
{
m_H.col(it).applyOnTheLeft(i-1,i,gr[i-1].adjoint());
}
// Compute the new plane rotation
|
DGMRES.h:
350
351
352
353
354
355
356
357
358
359
360
361 | }
// Compute the new coefficients by solving the least square problem
// it++;
//FIXME Check first if the matrix is singular ... zero diagonal
DenseVector nrs(m_restart);
nrs = m_H.topLeftCorner(it,it).template triangularView<Upper>().solve(g.head(it));
// Form the new solution
if (m_isDeflInitialized)
{
tv1 = m_V.leftCols(it) * nrs;
|
DGMRES.h:
493
494
495
496
497
498
499
500
501
502
503 | // Factorize m_T into m_luT
m_luT.compute(m_T.topLeftCorner(m_r, m_r));
//FIXME CHeck if the factorization was correctly done (nonsingular matrix)
m_isDeflInitialized = true;
return 0;
}
template<typename _MatrixType, typename _Preconditioner>
template<typename RhsType, typename DestType>
Index DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const
{
|
GMRES.h:
106
107
108
109
110
111
112
113
114
115
116 | v = VectorType::Unit(m, k - 1);
// apply Householder reflections H_{1} ... H_{k-1} to v
// TODO: use a HouseholderSequence
for (Index i = k - 1; i >= 0; --i) {
v.tail(m - i).applyHouseholderOnTheLeft(H.col(i).tail(m - i - 1), tau.coeffRef(i), workspace.data());
}
// apply matrix M to v: v = mat * v;
t.noalias() = mat * v;
v = precond.solve(t);
|
GMRES.h:
116
117
118
119
120
121
122
123
124
125
126
127 | t.noalias() = mat * v;
v = precond.solve(t);
// apply Householder reflections H_{k-1} ... H_{1} to v
// TODO: use a HouseholderSequence
for (Index i = 0; i < k; ++i) {
v.tail(m - i).applyHouseholderOnTheLeft(H.col(i).tail(m - i - 1), tau.coeffRef(i), workspace.data());
}
if (v.tail(m - k).norm() != 0.0)
{
if (k <= restart)
|
Scaling.h:
65
66
67
68
69
70
71
72
73
74
75 | /**
* Compute the left and right diagonal matrices to scale the input matrix @p mat
*
* FIXME This algorithm will be modified such that the diagonal elements are permuted on the diagonal.
*
* \sa LeftScaling() RightScaling()
*/
void compute (const MatrixType& mat)
{
using std::abs;
int m = mat.rows();
|
KroneckerTensorProduct.h:
163
164
165
166
167
168
169
170
171
172
173
174 | typedef Eigen::InnerIterator<Rhs1Cleaned> RhsInnerIterator;
// compute number of non-zeros per innervectors of dst
{
// TODO VectorXi is not necessarily big enough!
VectorXi nnzA = VectorXi::Zero(Dest::IsRowMajor ? m_A.rows() : m_A.cols());
for (Index kA=0; kA < m_A.outerSize(); ++kA)
for (LhsInnerIterator itA(lhs1,kA); itA; ++itA)
nnzA(Dest::IsRowMajor ? itA.row() : itA.col())++;
VectorXi nnzB = VectorXi::Zero(Dest::IsRowMajor ? m_B.rows() : m_B.cols());
for (Index kB=0; kB < m_B.outerSize(); ++kB)
|
LMpar.h:
60
61
62
63
64
65
66
67
68
69
70
71 | // const Index rank = qr.nonzeroPivots(); // exactly double(0.)
const Index rank = qr.rank(); // use a threshold
wa1 = qtb;
wa1.tail(n-rank).setZero();
//FIXME There is no solve in place for sparse triangularView
wa1.head(rank) = s.topLeftCorner(rank,rank).template triangularView<Upper>().solve(qtb.head(rank));
x = qr.colsPermutation()*wa1;
/* initialize the iteration counter. */
/* evaluate the function at the origin, and test */
/* for acceptance of the gauss-newton direction. */
|
LevenbergMarquardt.h:
298
299
300
301
302
303
304
305
306
307 | m_wa1.resize(n); m_wa2.resize(n); m_wa3.resize(n);
m_wa4.resize(m);
m_fvec.resize(m);
//FIXME Sparse Case : Allocate space for the jacobian
m_fjac.resize(m, n);
// m_fjac.reserve(VectorXi::Constant(n,5)); // FIXME Find a better alternative
if (!m_useExternalScaling)
m_diag.resize(n);
eigen_assert( (!m_useExternalScaling || m_diag.size()==n) && "When m_useExternalScaling is set, the caller must provide a valid 'm_diag'");
m_qtf.resize(n);
|
MatrixFunction.h:
62
63
64
65
66
67
68
69
70
71
72 | template <typename MatrixType>
MatrixType MatrixFunctionAtomic<MatrixType>::compute(const MatrixType& A)
{
// TODO: Use that A is upper triangular
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename MatrixType::Index Index;
Index rows = A.rows();
Scalar avgEival = A.trace() / Scalar(RealScalar(rows));
MatrixType Ashifted = A - avgEival * MatrixType::Identity(rows, rows);
RealScalar mu = matrix_function_compute_mu(Ashifted);
MatrixType F = m_f(avgEival, 0) * MatrixType::Identity(rows, rows);
|
MatrixPower.h:
26
27
28
29
30
31
32
33
34
35
36
37 | * should not be changed in the meantime). It is the return type of
* MatrixPower::operator() and related functions and most of the
* time this is the only way it is used.
*/
/* TODO This class is only used by MatrixPower, so it should be nested
* into MatrixPower, like MatrixPower::ReturnValue. However, my
* compiler complained about unused template parameter in the
* following declaration in namespace internal.
*
* template<typename MatrixType>
* struct traits<MatrixPower<MatrixType>::ReturnValue>;
*/
|
MatrixSquareRoot.h:
18
19
20
21
22
23
24
25
26
27
28
29 | // post: sqrtT.block(i,i,2,2) is square root of T.block(i,i,2,2)
template <typename MatrixType, typename ResultType>
void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, typename MatrixType::Index i, ResultType& sqrtT)
{
// TODO: This case (2-by-2 blocks with complex conjugate eigenvalues) is probably hidden somewhere
// in EigenSolver. If we expose it, we could call it directly from here.
typedef typename traits<MatrixType>::Scalar Scalar;
Matrix<Scalar,2,2> block = T.template block<2,2>(i,i);
EigenSolver<Matrix<Scalar,2,2> > es(block);
sqrtT.template block<2,2>(i,i)
= (es.eigenvectors() * es.eigenvalues().cwiseSqrt().asDiagonal() * es.eigenvectors().inverse()).real();
}
|
HybridNonLinearSolver.h:
64
65
66
67
68
69
70
71
72
73
74 | Scalar epsfcn;
};
typedef Matrix< Scalar, Dynamic, 1 > FVectorType;
typedef Matrix< Scalar, Dynamic, Dynamic > JacobianType;
/* TODO: if eigen provides a triangular storage, use it here */
typedef Matrix< Scalar, Dynamic, Dynamic > UpperTriangularType;
HybridNonLinearSolverSpace::Status hybrj1(
FVectorType &x,
const Scalar tol = std::sqrt(NumTraits<Scalar>::epsilon())
);
|
LevenbergMarquardt.h:
465
466
467
468
469
470
471
472
473
474
475
476 | }
permutation.setIdentity(n);
if (sing) {
wa2 = fjac.colwise().blueNorm();
// TODO We have no unit test covering this code path, do not modify
// until it is carefully tested
ColPivHouseholderQR<JacobianType> qrfac(fjac);
fjac = qrfac.matrixQR();
wa1 = fjac.diagonal();
fjac.diagonal() = qrfac.hCoeffs();
permutation = qrfac.colsPermutation();
// TODO : avoid this:
|
LevenbergMarquardt.h:
472
473
474
475
476
477
478
479
480
481
482
483 | fjac = qrfac.matrixQR();
wa1 = fjac.diagonal();
fjac.diagonal() = qrfac.hCoeffs();
permutation = qrfac.colsPermutation();
// TODO : avoid this:
for(Index ii=0; ii< fjac.cols(); ii++) fjac.col(ii).segment(ii+1, fjac.rows()-ii-1) *= fjac(ii,ii); // rescale vectors
for (j = 0; j < n; ++j) {
if (fjac(j,j) != 0.) {
sum = 0.;
for (i = j; i < n; ++i)
sum += fjac(i,j) * qtf[i];
|
dogleg.h:
47
48
49
50
51
52
53
54
55
56
57
58 | qnorm = diag.cwiseProduct(x).stableNorm();
if (qnorm <= delta)
return;
// TODO : this path is not tested by Eigen unit tests
/* the gauss-newton direction is not acceptable. */
/* next, calculate the scaled gradient direction. */
wa1.fill(0.);
for (j = 0; j < n; ++j) {
wa1.tail(n-j) += qrfac.row(j).tail(n-j) * qtb[j];
|
dogleg.h:
69
70
71
72
73
74
75
76
77
78
79 | /* calculate the point along the scaled gradient */
/* at which the quadratic is minimized. */
wa1.array() /= (diag*gnorm).array();
// TODO : once unit tests cover this part,:
// wa2 = qrfac.template triangularView<Upper>() * wa1;
for (j = 0; j < n; ++j) {
sum = 0.;
for (i = j; i < n; ++i) {
sum += qrfac(j,i) * wa1[i];
}
wa2[j] = sum;
|
qrsolv.h:
| namespace internal {
// TODO : once qrsolv2 is removed, use ColPivHouseholderQR or PermutationMatrix instead of ipvt
template <typename Scalar>
void qrsolv(
Matrix< Scalar, Dynamic, Dynamic > &s,
// TODO : use a PermutationMatrix once lmpar is no more:
const VectorXi &ipvt,
const Matrix< Scalar, Dynamic, 1 > &diag,
const Matrix< Scalar, Dynamic, 1 > &qtb,
|
qrsolv.h:
5
6
7
8
9
10
11
12
13
14
15
16 | // TODO : once qrsolv2 is removed, use ColPivHouseholderQR or PermutationMatrix instead of ipvt
template <typename Scalar>
void qrsolv(
Matrix< Scalar, Dynamic, Dynamic > &s,
// TODO : use a PermutationMatrix once lmpar is no more:
const VectorXi &ipvt,
const Matrix< Scalar, Dynamic, 1 > &diag,
const Matrix< Scalar, Dynamic, 1 > &qtb,
Matrix< Scalar, Dynamic, 1 > &x,
Matrix< Scalar, Dynamic, 1 > &sdiag)
{
|
r1mpyq.h:
| namespace internal {
// TODO : move this to GivensQR once there's such a thing in Eigen
template <typename Scalar>
void r1mpyq(DenseIndex m, DenseIndex n, Scalar *a, const std::vector<JacobiRotation<Scalar> > &v_givens, const std::vector<JacobiRotation<Scalar> > &w_givens)
{
typedef DenseIndex Index;
/* apply the first set of givens rotations to a. */
|
NumericalDiff.h:
71
72
73
74
75
76
77
78
79
80
81
82 | const typename InputType::Index n = _x.size();
const Scalar eps = sqrt(((std::max)(epsfcn,NumTraits<Scalar>::epsilon() )));
ValueType val1, val2;
InputType x = _x;
// TODO : we should do this only if the size is not already known
val1.resize(Functor::values());
val2.resize(Functor::values());
// initialization
switch(mode) {
case Forward:
// compute f(x)
|
SkylineInplaceLU.h:
335
336
337
338
339
340
341
342
343 | while (uIt) {
x->coeffRef(uIt.row()) -= x_col * uIt.value();
//TODO : introduce --operator
uIt += -1;
}
}
x->coeffRef(0) = x->coeff(0) / m_lu.coeffDiag(0);
|
SkylineMatrix.h:
579
580
581
582
583
584
585
586
587
588
589
590 | m_data.squeeze();
}
void prune(Scalar reference, RealScalar epsilon = dummy_precision<RealScalar > ()) {
//TODO
}
/** Resizes the matrix to a \a rows x \a cols matrix and initializes it to zero
* \sa resizeNonZeros(Index), reserve(), setZero()
*/
void resize(size_t rows, size_t cols) {
const Index diagSize = rows > cols ? cols : rows;
|
SkylineMatrix.h:
673
674
675
676
677
678
679
680
681
682
683 | template<typename OtherDerived>
inline SkylineMatrix & operator=(const SkylineMatrixBase<OtherDerived>& other) {
const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
if (needToTranspose) {
// TODO
// return *this;
} else {
// there is no special optimization
return SkylineMatrixBase<SkylineMatrix>::operator=(other.derived());
}
}
|
SkylineMatrixBase.h:
144
145
146
147
148
149
150
151
152
153
154 | return derived();
}
SkylineMatrixBase() : m_isRValue(false) {
/* TODO check flags */
}
inline Derived & operator=(const Derived& other) {
this->operator=<Derived > (other);
return derived();
}
|
SkylineMatrixBase.h:
165
166
167
168
169
170
171
172
173
174
175
176 | }
template<typename OtherDerived>
inline Derived & operator=(const SkylineMatrixBase<OtherDerived>& other) {
//TODO
}
template<typename Lhs, typename Rhs>
inline Derived & operator=(const SkylineProduct<Lhs, Rhs, SkylineTimeSkylineProduct>& product);
friend std::ostream & operator <<(std::ostream & s, const SkylineMatrixBase& m) {
s << m.derived();
|
SkylineStorage.h:
194
195
196
197
198
199
200
201
202
203
204
205 | memset(m_lowerProfile, 0, m_diagSize * sizeof (Index));
}
void prune(Scalar reference, RealScalar epsilon = dummy_precision<RealScalar>()) {
//TODO
}
protected:
inline void reallocate(Index diagSize, Index upperProfileSize, Index lowerProfileSize, Index upperSize, Index lowerSize) {
Scalar* diag = new Scalar[diagSize];
|
BlockSparseMatrix.h:
508
509
510
511
512
513
514
515
516
517
518
519 | {
//Browse each outer block
//First, copy and save the indices of nonzero blocks
//FIXME : find a way to avoid this ...
std::vector<int> nzBlockIdx;
typename MatrixType::InnerIterator it(blockPattern, bj);
for(; it; ++it)
{
nzBlockIdx.push_back(it.index());
}
std::sort(nzBlockIdx.begin(), nzBlockIdx.end());
|
BlockSparseMatrix.h:
597
598
599
600
601
602
603
604
605
606
607
608 | {
eigen_assert((m_innerBSize != 0 && m_outerBSize != 0) &&
"TRYING TO RESERVE ZERO-SIZE MATRICES, CALL resize() first");
//FIXME Should free if already allocated
m_outerIndex = new StorageIndex[m_outerBSize+1];
m_nonzerosblocks = nonzerosblocks;
if(m_blockSize != Dynamic)
{
m_nonzeros = nonzerosblocks * (m_blockSize * m_blockSize);
m_blockPtr = 0;
|
BlockSparseMatrix.h:
624
625
626
627
628
629
630
631
632
633
634
635 | * The InputIterator class should provide the functions row(), col() and value()
*
* \note For fixed-size blocks, call setBlockSize() before this function.
*
* FIXME Do not accept duplicates
*/
template<typename InputIterator>
void setFromTriplets(const InputIterator& begin, const InputIterator& end)
{
eigen_assert((m_innerBSize!=0 && m_outerBSize !=0) && "ZERO BLOCKS, PLEASE CALL resize() before");
/* First, sort the triplet list
|
BlockSparseMatrix.h:
632
633
634
635
636
637
638
639
640
641
642
643 | {
eigen_assert((m_innerBSize!=0 && m_outerBSize !=0) && "ZERO BLOCKS, PLEASE CALL resize() before");
/* First, sort the triplet list
* FIXME This can be unnecessarily expensive since only the inner indices have to be sorted
* The best approach is like in SparseMatrix::setFromTriplets()
*/
internal::TripletComp<InputIterator, IsColMajor> tripletcomp;
std::sort(begin, end, tripletcomp);
/* Count the number of rows and column blocks,
* and the number of nonzero blocks per outer dimension
|
BlockSparseMatrix.h:
820
821
822
823
824
825
826
827
828
829
830
831 | return Map<BlockScalar>(&(m_values[blockPtr(offset)]), rsize, csize);
}
else
{
//FIXME the block does not exist, Insert it !!!!!!!!!
eigen_assert("DYNAMIC INSERTION IS NOT YET SUPPORTED");
}
}
/**
* \returns the value of the (i,j) block as an Eigen Dense Matrix
*/
|
BlockSparseMatrix.h:
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013 | // block row index
inline Index row() const {return index(); }
// block column index
inline Index col() const {return outer(); }
// FIXME Number of rows in the current block
inline Index rows() const { return (m_mat.m_blockSize==Dynamic) ? (m_mat.m_innerOffset[index()+1] - m_mat.m_innerOffset[index()]) : m_mat.m_blockSize; }
// Number of columns in the current block ...
inline Index cols() const { return (m_mat.m_blockSize==Dynamic) ? (m_mat.m_outerOffset[m_outer+1]-m_mat.m_outerOffset[m_outer]) : m_mat.m_blockSize;}
inline operator bool() const { return (m_id < m_end); }
protected:
const BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, StorageIndex>& m_mat;
|
DynamicSparseMatrix.h:
59
60
61
62
63
64
65
66
67
68
69
70 | typedef SparseMatrixBase<DynamicSparseMatrix> Base;
using Base::convert_index;
public:
EIGEN_SPARSE_PUBLIC_INTERFACE(DynamicSparseMatrix)
// FIXME: why are these operator already alvailable ???
// EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(DynamicSparseMatrix, +=)
// EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(DynamicSparseMatrix, -=)
typedef MappedSparseMatrix<Scalar,Flags> Map;
using Base::IsRowMajor;
using Base::operator=;
enum {
Options = _Options
|
DynamicSparseMatrix.h:
213
214
215
216
217
218
219
220
221
222
223
224 | const Index innerSize = IsRowMajor ? cols : rows;
if (m_innerSize>innerSize)
{
// remove all coefficients with innerCoord>=innerSize
// TODO
//std::cerr << "not implemented yet\n";
exit(2);
}
if (m_data.size() != outerSize)
{
m_data.resize(outerSize);
}
|
SpecialFunctionsImpl.h:
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397 | s += t1;
s += ai;
u = a * numext::log(x);
// TODO: gamma() is not directly implemented in Eigen.
/*
if ((a + b) < maxgam && numext::abs(u) < maxlog) {
t = gamma(a + b) / (gamma(a) * gamma(b));
s = s * t * pow(x, a);
} else {
*/
t = lgamma_impl<double>::run(a + b) - lgamma_impl<double>::run(a) -
|
SpecialFunctionsImpl.h:
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476 | x (1-x) | (a+b) / ( a | (a) | (b) ) . */
y = a * numext::log(x);
t = b * numext::log(xc);
// TODO: gamma is not directly implemented in Eigen.
/*
if ((a + b) < maxgam && numext::abs(y) < maxlog && numext::abs(t) < maxlog)
{
t = pow(xc, b);
t *= pow(x, a);
t /= a;
t *= w;
|
Complex.h:
82
83
84
85
86
87
88
89
90
91
92
93 | }
template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
{
// FIXME The following might be optimized using _mm256_movedup_pd
Packet2cf a = ploaddup<Packet2cf>(from);
Packet2cf b = ploaddup<Packet2cf>(from+1);
return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
}
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
|
MathFunctions.h:
113
114
115
116
117
118
119
120
121
122
123
124 | // Natural logarithm
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
// be easily approximated by a polynomial centered on m=1 for stability.
// TODO(gonnet): Further reduce the interval allowing for lower-degree
// polynomial interpolants -> ... -> profit!
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
plog<Packet8f>(const Packet8f& _x) {
Packet8f x = _x;
_EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
_EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
|
MathFunctions.h:
245
246
247
248
249
250
251
252
253
254
255
256 | #endif
Packet8f r2 = pmul(r, r);
// TODO(gonnet): Split into odd/even polynomials and try to exploit
// instruction-level parallelism.
Packet8f y = p8f_cephes_exp_p0;
y = pmadd(y, r, p8f_cephes_exp_p1);
y = pmadd(y, r, p8f_cephes_exp_p2);
y = pmadd(y, r, p8f_cephes_exp_p3);
y = pmadd(y, r, p8f_cephes_exp_p4);
y = pmadd(y, r, p8f_cephes_exp_p5);
|
MathFunctions.h:
331
332
333
334
335
336
337
338
339
340
341
342 | qx = pmadd(qx, x2, p4d_cephes_exp_q2);
qx = pmadd(qx, x2, p4d_cephes_exp_q3);
// I don't really get this bit, copied from the SSE2 routines, so...
// TODO(gonnet): Figure out what is going on here, perhaps find a better
// rational interpolant?
x = _mm256_div_pd(px, psub(qx, px));
x = pmadd(p4d_2, x, p4d_1);
// Build e=2^n by constructing the exponents in a 128-bit vector and
// shifting them to where they belong in double-precision values.
__m128i emm0 = _mm256_cvtpd_epi32(fx);
|
PacketMath.h:
220
221
222
223
224
225
226
227
228
229
230 | // Loads 4 floats from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, a3}
template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
{
// TODO try to find a way to avoid the need of a temporary register
// Packet8f tmp = _mm256_castps128_ps256(_mm_loadu_ps(from));
// tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1);
// return _mm256_unpacklo_ps(tmp,tmp);
// _mm256_insertf128_ps is very slow on Haswell, thus:
Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
// mimic an "inplace" permutation of the lower 128bits using a blend
|
PacketMath.h:
353
354
355
356
357
358
359
360
361
362
363 | return _mm256_and_pd(a,mask);
}
// preduxp should be ok
// FIXME: why is this ok? why isn't the simply implementation working as expected?
template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
{
__m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
__m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
__m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
__m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
|
MathFunctions.h:
159
160
161
162
163
164
165
166
167
168
169
170 | _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
Packet16f r2 = pmul(r, r);
// TODO(gonnet): Split into odd/even polynomials and try to exploit
// instruction-level parallelism.
Packet16f y = p16f_cephes_exp_p0;
y = pmadd(y, r, p16f_cephes_exp_p1);
y = pmadd(y, r, p16f_cephes_exp_p2);
y = pmadd(y, r, p16f_cephes_exp_p3);
y = pmadd(y, r, p16f_cephes_exp_p4);
y = pmadd(y, r, p16f_cephes_exp_p5);
|
MathFunctions.h:
233
234
235
236
237
238
239
240
241
242
243
244 | qx = pmadd(qx, x2, p8d_cephes_exp_q2);
qx = pmadd(qx, x2, p8d_cephes_exp_q3);
// I don't really get this bit, copied from the SSE2 routines, so...
// TODO(gonnet): Figure out what is going on here, perhaps find a better
// rational interpolant?
x = _mm512_div_pd(px, psub(qx, px));
x = pmadd(p8d_2, x, p8d_1);
// Build e=2^n.
const Packet8d e = _mm512_castsi512_pd(_mm512_slli_epi64(
_mm512_add_epi64(_mm512_cvtpd_epi64(n), _mm512_set1_epi64(1023)), 52));
|
PacketMath.h:
81
82
83
84
85
86
87
88
89
90
91
92 | HasDiv = 1
};
};
/* TODO Implement AVX512 for integers
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet16i type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=8
|
Complex.h:
227
228
229
230
231
232
233
234
235
236
237
238 | EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for AltiVec
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
Packet4f s = pmul<Packet4f>(b.v, b.v);
return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
}
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
{
|
Complex.h:
362
363
364
365
366
367
368
369
370
371
372
373 | struct palign_impl<Offset,Packet1cd>
{
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
Complex.h:
404
405
406
407
408
409
410
411
412
413
414
415 | EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for AltiVec
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
Packet2d s = pmul<Packet2d>(b.v, b.v);
return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
}
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
{
|
Complex.h:
268
269
270
271
272
273
274
275
276
277
278 | EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for NEON
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
Packet4f s, rev_s;
// this computes the norm
s = vmulq_f32(b.v, b.v);
rev_s = vrev64q_f32(s);
|
Complex.h:
419
420
421
422
423
424
425
426
427
428
429
430 | struct palign_impl<Offset,Packet1cd>
{
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
Complex.h:
461
462
463
464
465
466
467
468
469
470
471 | EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for NEON
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
Packet2d s = pmul<Packet2d>(b.v, b.v);
Packet2d rev_s = preverse<Packet2d>(s);
return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
}
|
PacketMath.h:
108
109
110
111
112
113
114
115
116
117
118
119 | size = 4,
HasHalfPacket=0, // Packet2f intrinsics not implemented yet
HasDiv = 1,
// FIXME check the Has*
HasSin = 0,
HasCos = 0,
HasLog = 0,
HasExp = 1,
HasSqrt = 0
};
};
|
PacketMath.h:
125
126
127
128
129
130
131
132
133
134
135
136 | Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
HasHalfPacket=0 // Packet2i intrinsics not implemented yet
// FIXME check the Has*
};
};
#if EIGEN_GNUC_AT_MOST(4,4) && !EIGEN_COMP_LLVM
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
|
PacketMath.h:
337
338
339
340
341
342
343
344
345
346 | template<> EIGEN_STRONG_INLINE void prefetch<float> (const float* addr) { EIGEN_ARM_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) { EIGEN_ARM_PREFETCH(addr); }
// FIXME only store the 2 first elements ?
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
float32x2_t a_lo, a_hi;
Packet4f a_r64;
|
PacketMath.h:
596
597
598
599
600
601
602
603
604
605
606
607 | size = 2,
HasHalfPacket=0,
HasDiv = 1,
// FIXME check the Has*
HasSin = 0,
HasCos = 0,
HasLog = 0,
HasExp = 0,
HasSqrt = 0
};
};
|
PacketMath.h:
685
686
687
688
689
690
691
692
693
694
695
696 | to[stride*1] = vgetq_lane_f64(from, 1);
}
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ARM_PREFETCH(addr); }
// FIXME only store the 2 first elements ?
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); }
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
|
Complex.h:
232
233
234
235
236
237
238
239
240
241
242
243 | EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for SSE3 and 4
Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
__m128 s = _mm_mul_ps(b.v,b.v);
return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
}
EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)
{
|
Complex.h:
309
310
311
312
313
314
315
316
317
318
319 | template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
// FIXME force unaligned load, this is a temporary fix
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
Complex.h:
319
320
321
322
323
324
325
326
327
328
329
330 | { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
// FIXME force unaligned store, this is a temporary fix
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{
|
Complex.h:
354
355
356
357
358
359
360
361
362
363
364
365 | struct palign_impl<Offset,Packet1cd>
{
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
|
Complex.h:
417
418
419
420
421
422
423
424
425
426
427
428 | EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for SSE3 and 4
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
__m128d s = _mm_mul_pd(b.v,b.v);
return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
}
EIGEN_STRONG_INLINE Packet1cd pcplxflip/* <Packet1cd> */(const Packet1cd& x)
{
|
PacketMath.h:
168
169
170
171
172
173
174
175
176
177
178
179 | #endif
#if EIGEN_COMP_MSVC==1500
// Workaround MSVC 9 internal compiler error.
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
#else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
|
PacketMath.h:
169
170
171
172
173
174
175
176
177
178
179 | #if EIGEN_COMP_MSVC==1500
// Workaround MSVC 9 internal compiler error.
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps(from,from,from,from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
#else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
|
PacketMath.h:
607
608
609
610
611
612
613
614
615
616
617
618 | template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
{
// after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., reusing pmul is very slow !)
// TODO try to call _mm_mul_epu32 directly
EIGEN_ALIGN16 int aux[4];
pstore(aux, a);
return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
}
// min
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
|
Complex.h:
252
253
254
255
256
257
258
259
260
261
262
263 | struct palign_impl<Offset,Packet1cd>
{
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
}
};
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
|
Complex.h:
340
341
342
343
344
345
346
347
348
349
350
351 | EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for AltiVec
Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
}
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
|
Complex.h:
348
349
350
351
352
353
354
355
356
357
358
359 | }
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for AltiVec
Packet2cf res;
res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
return res;
}
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
|
PacketMath.h:
336
337
338
339
340
341
342
343
344
345
346
347 | };
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_LOAD
Packet *vfrom;
vfrom = (Packet *) from;
return vfrom->v4i;
}
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
|
PacketMath.h:
345
346
347
348
349
350
351
352
353
354
355 | }
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_LOAD
Packet4f vfrom;
vfrom.v4f[0] = vec_ld2f(&from[0]);
vfrom.v4f[1] = vec_ld2f(&from[2]);
return vfrom;
}
|
PacketMath.h:
355
356
357
358
359
360
361
362
363
364
365
366 | }
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_LOAD
Packet *vfrom;
vfrom = (Packet *) from;
return vfrom->v2d;
}
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
|
PacketMath.h:
364
365
366
367
368
369
370
371
372
373
374
375 | }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_STORE
Packet *vto;
vto = (Packet *) to;
vto->v4i = from;
}
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
|
PacketMath.h:
373
374
375
376
377
378
379
380
381
382
383
384 | }
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_STORE
vec_st2f(from.v4f[0], &to[0]);
vec_st2f(from.v4f[1], &to[2]);
}
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
|
PacketMath.h:
382
383
384
385
386
387
388
389
390
391 | template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
{
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_STORE
Packet *vto;
vto = (Packet *) to;
vto->v2d = from;
}
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
|
TensorAssign.h:
113
114
115
116
117
118
119
120
121
122
123
124 | EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
{
// The dimensions of the lhs and the rhs tensors should be equal to prevent
// overflows and ensure the result is fully initialized.
// TODO: use left impl instead if right impl dimensions are known at compile time.
return m_rightImpl.dimensions();
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
m_leftImpl.evalSubExprsIfNeeded(NULL);
// If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non
|
TensorBroadcasting.h:
166
167
168
169
170
171
172
173
174
175
176
177 | return coeffRowMajor(index);
}
}
// TODO: attempt to speed this up. The integer divisions and modulo are slow
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const
{
Index inputIndex = 0;
for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_outputStrides[i];
if (internal::index_statically_eq<Broadcast>(i, 1)) {
eigen_assert(idx < m_impl.dimensions()[i]);
|
TensorBroadcasting.h:
280
281
282
283
284
285
286
287
288
289
290
291 | }
}
inputIndex += innermostLoc;
// Todo: this could be extended to the second dimension if we're not
// broadcasting alongside the first dimension, and so on.
if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) {
return m_impl.template packet<Unaligned>(inputIndex);
} else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
values[0] = m_impl.coeff(inputIndex);
for (int i = 1; i < PacketSize; ++i) {
|
TensorBroadcasting.h:
332
333
334
335
336
337
338
339
340
341
342
343 | }
}
inputIndex += innermostLoc;
// Todo: this could be extended to the second dimension if we're not
// broadcasting alongside the first dimension, and so on.
if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) {
return m_impl.template packet<Unaligned>(inputIndex);
} else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
values[0] = m_impl.coeff(inputIndex);
for (int i = 1; i < PacketSize; ++i) {
|
TensorConcatenation.h:
175
176
177
178
179
180
181
182
183
184
185 | }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
// TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear?
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/)
{
m_leftImpl.evalSubExprsIfNeeded(NULL);
m_rightImpl.evalSubExprsIfNeeded(NULL);
return true;
}
|
TensorConcatenation.h:
189
190
191
192
193
194
195
196
197
198
199
200 | m_leftImpl.cleanup();
m_rightImpl.cleanup();
}
// TODO(phli): attempt to speed this up. The integer divisions and modulo are slow.
// See CL/76180724 comments for more ideas.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
{
// Collect dimension-wise indices (subs).
array<Index, NumDims> subs;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int i = NumDims - 1; i > 0; --i) {
|
TensorConcatenation.h:
243
244
245
246
247
248
249
250
251
252
253 | return m_rightImpl.coeff(right_index);
}
}
// TODO(phli): Add a real vectorization.
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
|
TensorContractionCuda.h:
429
430
431
432
433
434
435
436
437
438
439 | // each do 8 writes into global memory. We can just overwrite the shared
// memory from the problem we just solved.
// (2) is slightly faster than (1) due to less branching and more ILP
// TODO: won't yield much gain, but could just use currently unused shared mem
// and then we won't have to sync
// wait for shared mem to be out of use
__syncthreads();
#define writeResultShmem(i, j) \
lhs_shmem[i + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j] = res(i, j); \
|
TensorContractionCuda.h:
465
466
467
468
469
470
471
472
473
474
475
476 | const int max_j_write = numext::mini((int)((n_size - base_n - threadIdx.z + 7) / 8), 8);
if (threadIdx.x < max_i_write) {
if (max_j_write == 8) {
// TODO: can i trade bank conflicts for coalesced writes?
Scalar val0 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 0];
Scalar val1 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 1];
Scalar val2 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 2];
Scalar val3 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 3];
Scalar val4 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 4];
Scalar val5 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 5];
Scalar val6 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 6];
|
TensorContractionMapper.h:
344
345
346
347
348
349
350
351
352
353
354
355 | typedef Self LinearMapper;
enum {
// We can use direct offsets iff the parent mapper supports then and we can compute the strides.
// TODO: we should also enable direct offsets for the Rhs case.
UseDirectOffsets = ParentMapper::DirectOffsets && (side == Lhs) && inner_dim_contiguous && (array_size<contract_t>::value > 0)
};
EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)
: m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) {
// Bake the offsets into the buffer used by the base mapper whenever possible. This avoids the need to recompute
// this offset every time we attempt to access a coefficient.
|
TensorContractionThreadPool.h:
204
205
206
207
208
209
210
211
212
213
214
215 | contractionCost(m, n, bm, bn, bk, shard_by_col, false);
int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
static_cast<double>(n) * m, cost, this->m_device.numThreads());
// TODO(dvyukov): this is a stop-gap to prevent regressions while the cost
// model is not tuned. Remove this when the cost model is tuned.
if (n == 1) num_threads = 1;
if (num_threads == 1) {
// The single-threaded algorithm should be faster in this case.
if (n == 1)
this->template evalGemv<lhs_inner_dim_contiguous,
|
TensorContractionThreadPool.h:
386
387
388
389
390
391
392
393
394
395
396
397 | void run() {
// Kick off packing of the first slice.
signal_switch(0, 1);
// Wait for overall completion.
// TODO(dvyukov): this wait can lead to deadlock.
// If nthreads contractions are concurrently submitted from worker
// threads, this wait will block all worker threads and the system will
// deadlock.
done_.Wait();
}
private:
|
TensorContractionThreadPool.h:
770
771
772
773
774
775
776
777
778
779
780
781 | rhs_inner_dim_reordered, Unaligned> RhsMapper;
typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
// TODO: packing could be faster sometimes if we supported row major tensor mappers
typedef internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, Traits::mr,
Traits::LhsProgress, ColMajor> LhsPacker;
typedef internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> RhsPacker;
// TODO: replace false, false with conjugate values?
typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
Traits::mr, Traits::nr, false, false> GebpKernel;
|
TensorContractionThreadPool.h:
775
776
777
778
779
780
781
782
783
784
785
786 | typedef internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, Traits::mr,
Traits::LhsProgress, ColMajor> LhsPacker;
typedef internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> RhsPacker;
// TODO: replace false, false with conjugate values?
typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
Traits::mr, Traits::nr, false, false> GebpKernel;
typedef internal::packLhsArg<LhsScalar, LhsMapper, Index> packLArg;
typedef internal::packRhsAndKernelArg<LhsScalar, RhsScalar, RhsMapper, OutputMapper, Index> packRKArg;
// initialize data mappers
|
TensorContractionThreadPool.h:
826
827
828
829
830
831
832
833
834
835
836 | blockAs.push_back(static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar))));
}
// To circumvent alignment issues, I'm just going to separately allocate the memory for each thread
// TODO: is this too much memory to allocate? This simplifies coding a lot, but is wasteful.
// Other options: (1) reuse memory when a thread finishes. con: tricky
// (2) allocate block B memory in each thread. con: overhead
MaxSizeVector<RhsScalar *> blockBs(n_blocks);
for (int i = 0; i < n_blocks; i++) {
blockBs.push_back(static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar))));
}
|
TensorConvolution.h:
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074 | }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
costPerCoeff(bool vectorized) const {
// TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost
// model.
const double kernel_size = m_kernelImpl.dimensions().TotalSize();
// We ignore the use of fused multiply-add.
const double convolve_compute_cost =
TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>();
const double firstIndex_compute_cost =
NumDims *
|
TensorCostModel.h:
23
24
25
26
27
28
29
30
31
32
33
34 | // Class storing the cost of evaluating a tensor expression in terms of the
// estimated number of operand bytes loads, bytes stored, and compute cycles.
class TensorOpCost {
public:
// TODO(rmlarsen): Fix the scalar op costs in Eigen proper. Even a simple
// model based on minimal reciprocal throughput numbers from Intel or
// Agner Fog's tables would be better than what is there now.
template <typename ArgType>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost() {
return internal::functor_traits<
internal::scalar_product_op<ArgType, ArgType> >::Cost;
}
|
TensorCostModel.h:
92
93
94
95
96
97
98
99
100
101
102
103 | bytes_loaded_ = 0;
bytes_stored_ = 0;
}
// TODO(rmlarsen): Define min in terms of total cost, not elementwise.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin(
const TensorOpCost& rhs) const {
double bytes_loaded = numext::mini(bytes_loaded_, rhs.bytes_loaded());
double bytes_stored = numext::mini(bytes_stored_, rhs.bytes_stored());
double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles());
return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles);
}
|
TensorCostModel.h:
101
102
103
104
105
106
107
108
109
110
111
112 | double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles());
return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles);
}
// TODO(rmlarsen): Define max in terms of total cost, not elementwise.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax(
const TensorOpCost& rhs) const {
double bytes_loaded = numext::maxi(bytes_loaded_, rhs.bytes_loaded());
double bytes_stored = numext::maxi(bytes_stored_, rhs.bytes_stored());
double compute_cycles = numext::maxi(compute_cycles_, rhs.compute_cycles());
return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles);
}
|
TensorCostModel.h:
153
154
155
156
157
158
159
160
161
162
163
164 | double bytes_stored_;
double compute_cycles_;
};
// TODO(rmlarsen): Implement a policy that chooses an "optimal" number of theads
// in [1:max_threads] instead of just switching multi-threading off for small
// work units.
template <typename Device>
class TensorCostModel {
public:
// Scaling from Eigen compute cost to device cycles.
static const int kDeviceCyclesPerComputeCycle = 1;
|
TensorCustomOp.h:
133
134
135
136
137
138
139
140
141
142
143
144 | return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
// TODO(rmlarsen): Extend CustomOp API to return its cost estimate.
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
}
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }
protected:
EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
|
TensorCustomOp.h:
288
289
290
291
292
293
294
295
296
297
298
299 | return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
// TODO(rmlarsen): Extend CustomOp API to return its cost estimate.
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
}
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }
protected:
EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
|
TensorDevice.h:
20
21
22
23
24
25
26
27
28
29
30
31 | *
* Example:
* C.device(EIGEN_GPU) = A + B;
*
* Todo: operator *= and /=.
*/
template <typename ExpressionType, typename DeviceType> class TensorDevice {
public:
TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {}
template<typename OtherDerived>
|
TensorDeviceCuda.h:
188
189
190
191
192
193
194
195
196
197
198
199 | }
explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) {
eigen_assert(stream);
}
// TODO(bsteiner): This is an internal API, we should not expose it.
EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
return stream_->stream();
}
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
return stream_->allocate(num_bytes);
}
|
TensorDeviceCuda.h:
245
246
247
248
249
250
251
252
253
254
255
256 | #endif
}
EIGEN_STRONG_INLINE size_t numThreads() const {
// FIXME
return 32;
}
EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
// FIXME
return 48*1024;
}
|
TensorDeviceCuda.h:
250
251
252
253
254
255
256
257
258
259
260
261 | return 32;
}
EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
// FIXME
return 48*1024;
}
EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
// We won't try to take advantage of the l2 cache for the time being, and
// there is no l3 cache on cuda devices.
return firstLevelCacheSize();
|
TensorDeviceCuda.h:
318
319
320
321
322
323
324
325
326
327
328
329 | (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \
assert(cudaGetLastError() == cudaSuccess);
// FIXME: Should be device and kernel specific.
#ifdef __CUDACC__
static EIGEN_DEVICE_FUNC inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
#ifndef __CUDA_ARCH__
cudaError_t status = cudaDeviceSetSharedMemConfig(config);
EIGEN_UNUSED_VARIABLE(status)
assert(status == cudaSuccess);
#else
|
TensorDimensions.h:
104
105
106
107
108
109
110
111
112
113
114 | EIGEN_DEVICE_FUNC Sizes() { }
template <typename DenseIndex>
explicit EIGEN_DEVICE_FUNC Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
// todo: add assertion
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { }
explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
// todo: add assertion
}
#endif
|
TensorDimensions.h:
109
110
111
112
113
114
115
116
117
118
119
120 | }
#if EIGEN_HAS_VARIADIC_TEMPLATES
template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { }
explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
// todo: add assertion
}
#endif
template <typename T> Sizes& operator = (const T& /*other*/) {
// add assertion failure if the size of other is different
return *this;
}
|
TensorDimensions.h:
166
167
168
169
170
171
172
173
174
175
176 | Sizes() { }
template <typename DenseIndex>
explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
// todo: add assertion
}
template <typename T> Sizes& operator = (const T& /*other*/) {
// add assertion failure if the size of other is different
return *this;
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
TensorDimensions.h:
176
177
178
179
180
181
182
183
184
185
186 | #if EIGEN_HAS_VARIADIC_TEMPLATES
template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { }
explicit Sizes(std::initializer_list<std::size_t>) {
// todo: add assertion
}
#else
EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) {
}
EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex) {
}
EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex) {
|
TensorEvaluator.h:
18
19
20
21
22
23
24
25
26
27
28
29 | * \brief The tensor evaluator classes.
*
* These classes are responsible for the evaluation of the tensor expression.
*
* TODO: add support for more types of expressions, in particular expressions
* leading to lvalues (slicing, reshaping, etc...)
*/
// Generic evaluator
template<typename Derived, typename Device>
struct TensorEvaluator
{
|
TensorEvaluator.h:
388
389
390
391
392
393
394
395
396
397
398
399 | typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
{
// TODO: use right impl instead if right impl dimensions are known at compile time.
return m_leftImpl.dimensions();
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
m_leftImpl.evalSubExprsIfNeeded(NULL);
m_rightImpl.evalSubExprsIfNeeded(NULL);
return true;
|
TensorEvaluator.h:
483
484
485
486
487
488
489
490
491
492
493
494 | typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
{
// TODO: use arg2 or arg3 dimensions if they are known at compile time.
return m_arg1Impl.dimensions();
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
m_arg1Impl.evalSubExprsIfNeeded(NULL);
m_arg2Impl.evalSubExprsIfNeeded(NULL);
m_arg3Impl.evalSubExprsIfNeeded(NULL);
|
TensorEvaluator.h:
573
574
575
576
577
578
579
580
581
582
583
584 | typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
{
// TODO: use then or else impl instead if they happen to be known at compile time.
return m_condImpl.dimensions();
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
m_condImpl.evalSubExprsIfNeeded(NULL);
m_thenImpl.evalSubExprsIfNeeded(NULL);
m_elseImpl.evalSubExprsIfNeeded(NULL);
|
TensorExpr.h:
80
81
82
83
84
85
86
87
88
89
90
91 | template<typename UnaryOp, typename XprType>
struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> >
: traits<XprType>
{
// TODO(phli): Add InputScalar, InputPacket. Check references to
// current Scalar/Packet to see if the intent is Input or Output.
typedef typename result_of<UnaryOp(typename XprType::Scalar)>::type Scalar;
typedef traits<XprType> XprTraits;
typedef typename XprType::Nested XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
|
TensorExpr.h:
110
111
112
113
114
115
116
117
118
119
120
121 | template<typename UnaryOp, typename XprType>
class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType>, ReadOnlyAccessors>
{
public:
// TODO(phli): Add InputScalar, InputPacket. Check references to
// current Scalar/Packet to see if the intent is Input or Output.
typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
typedef Scalar CoeffReturnType;
typedef typename Eigen::internal::nested<TensorCwiseUnaryOp>::type Nested;
typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Index Index;
|
TensorExpr.h:
142
143
144
145
146
147
148
149
150
151
152
153 | struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >
{
// Type promotion to handle the case where the types of the lhs and the rhs
// are different.
// TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to
// current Scalar/Packet to see if the intent is Inputs or Output.
typedef typename result_of<
BinaryOp(typename LhsXprType::Scalar,
typename RhsXprType::Scalar)>::type Scalar;
typedef traits<LhsXprType> XprTraits;
typedef typename promote_storage_type<
typename traits<LhsXprType>::StorageKind,
|
TensorExpr.h:
186
187
188
189
190
191
192
193
194
195
196
197 | template<typename BinaryOp, typename LhsXprType, typename RhsXprType>
class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, ReadOnlyAccessors>
{
public:
// TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to
// current Scalar/Packet to see if the intent is Inputs or Output.
typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Scalar Scalar;
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
typedef Scalar CoeffReturnType;
typedef typename Eigen::internal::nested<TensorCwiseBinaryOp>::type Nested;
typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Index Index;
|
TensorFFT.h:
20
21
22
23
24
25
26
27
28
29
30
31 | * \ingroup CXX11_Tensor_Module
*
* \brief Tensor FFT class.
*
* TODO:
* Vectorize the Cooley Tukey and the Bluestein algorithm
* Add support for multithreaded evaluation
* Improve the performance on GPU
*/
template <bool NeedUprade> struct MakeComplex {
template <typename T>
|
TensorFixedSize.h:
335
336
337
338
339
340
341
342
343
344
345 | EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TensorFixedSize& operator=(const TensorFixedSize& other)
{
// FIXME: check that the dimensions of other match the dimensions of *this.
// Unfortunately this isn't possible yet when the rhs is an expression.
typedef TensorAssignOp<Self, const TensorFixedSize> Assign;
Assign assign(*this, other);
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
template<typename OtherDerived>
|
TensorFixedSize.h:
346
347
348
349
350
351
352
353
354
355
356 | template<typename OtherDerived>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TensorFixedSize& operator=(const OtherDerived& other)
{
// FIXME: check that the dimensions of other match the dimensions of *this.
// Unfortunately this isn't possible yet when the rhs is an expression.
typedef TensorAssignOp<Self, const OtherDerived> Assign;
Assign assign(*this, other);
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
return *this;
}
|
TensorGenerator.h:
146
147
148
149
150
151
152
153
154
155
156 | }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
costPerCoeff(bool) const {
// TODO(rmlarsen): This is just a placeholder. Define interface to make
// generators return their cost.
return TensorOpCost(0, 0, TensorOpCost::AddCost<Scalar>() +
TensorOpCost::MulCost<Scalar>());
}
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
TensorInflation.h:
183
184
185
186
187
188
189
190
191
192
193 | return Scalar(0);
}
}
// TODO(yangke): optimize this function so that we can detect and produce
// all-zero packets
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
TensorMorphing.h:
280
281
282
283
284
285
286
287
288
289
290
291 | const Sizes m_sizes;
};
// Fixme: figure out the exact threshold
namespace {
template <typename Index, typename Device> struct MemcpyTriggerForSlicing {
EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; }
private:
Index threshold_;
|
TensorRandom.h:
55
56
57
58
59
60
61
62
63
64
65 | #endif
}
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) {
// TODO: Unify with the implementation in the non blocking thread pool.
uint64_t current = *state;
// Update the internal state
*state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
// Generate the random output (using the PCG-XSH-RS scheme)
return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
}
|
TensorReduction.h:
605
606
607
608
609
610
611
612
613
614
615
616 | return reducer.finalize(accum);
}
}
// TODO(bsteiner): provide a more efficient implementation.
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions())));
if (RunningOnGPU && m_result) {
|
TensorReverse.h:
197
198
199
200
201
202
203
204
205
206
207
208 | {
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
// TODO(ndjaitly): write a better packing routine that uses
// local structure.
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
values[PacketSize];
for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index+i);
}
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
|
TensorScan.h:
207
208
209
210
211
212
213
214
215
216
217
218 | CoeffReturnType* m_output;
};
// CPU implementation of scan
// TODO(ibab) This single-threaded implementation should be parallelized,
// at least by running multiple scans at the same time.
template <typename Self, typename Reducer, typename Device>
struct ScanLauncher {
void operator()(Self& self, typename Self::CoeffReturnType *data) {
Index total_size = internal::array_prod(self.dimensions());
// We fix the index along the scan axis to 0 and perform a
|
TensorScan.h:
243
244
245
246
247
248
249
250
251
252
253 | #if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
// GPU implementation of scan
// TODO(ibab) This placeholder implementation performs multiple scans in
// parallel, but it would be better to use a parallel scan algorithm and
// optimize memory access.
template <typename Self, typename Reducer>
__global__ void ScanKernel(Self self, Index total_size, typename Self::CoeffReturnType* data) {
// Compute offset as in the CPU version
Index val = threadIdx.x + blockIdx.x * blockDim.x;
Index offset = (val / self.stride()) * self.stride() * self.size() + val % self.stride();
|
TensorTraits.h:
165
166
167
168
169
170
171
172
173
174
175
176 | {
typedef const TensorRef<PlainObjectType>& type;
};
// TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector.
template<typename T, int n=1, typename PlainObject = void> struct nested
{
typedef typename ref_selector<T>::type type;
};
template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
struct nested<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
|
NonBlockingThreadPool.h:
144
145
146
147
148
149
150
151
152
153
154
155 | if (!t.f) {
t = Steal();
if (!t.f) {
// Leave one thread spinning. This reduces latency.
// TODO(dvyukov): 1000 iterations is based on fair dice roll, tune it.
// Also, the time it takes to attempt to steal work 1000 times depends
// on the size of the thread pool. However the speed at which the user
// of the thread pool submit tasks is independent of the size of the
// pool. Consider a time based limit instead.
if (!spinning_ && !spinning_.exchange(true)) {
for (int i = 0; i < 1000 && !t.f; i++) {
t = Steal();
|
EmulateArray.h:
177
178
179
180
181
182
183
184
185
186
187
188 | T dummy;
};
// Comparison operator
// Todo: implement !=, <, <=, >, and >=
template<class T, std::size_t N>
EIGEN_DEVICE_FUNC bool operator==(const array<T,N>& lhs, const array<T,N>& rhs) {
for (std::size_t i = 0; i < N; ++i) {
if (lhs[i] != rhs[i]) {
return false;
}
}
|
test_exceptions_greedy.py:
39
40
41
42
43
44
45
46
47
48
49 | # Bad `m`
x = map[1, 3]
assert len(x) == 0
# TODO: It would be nice if `map[1, 3] = ...` raised
# an error, but currently it does nothing (silently).
# Bad type
with pytest.raises(ValueError) as e:
x = map[2.3, 5.7]
with pytest.raises(ValueError) as e:
map[2.3, 5.7] = 1
|
test_light_delay_greedy.py:
| sec = starry.Secondary(starry.Map(), porb=1.0)
sys = starry.System(pri, sec, light_delay=True)
assert sys.light_delay is True
# TODO: Add tests here.
|
test_system_greedy.py:
| sys = starry.System(pri, sec)
t = np.concatenate((np.linspace(0.1, 0.4, 50), np.linspace(0.6, 0.9, 50)))
flux = sys.flux(t)
# TODO: Add an analytic validation here
|