todosΒΆ

There are 427 todo items that need to be addressed in starry.

kepler.py:

1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
        .. note::
            Users may call the :py:meth:`draw` method of this class to draw
            from the posterior after calling :py:meth:`solve`.
        """
        # TODO: Implement for spectral maps?
        self._no_spectral()

        # Check that the data is set
        if self._flux is None or self._C is None:
            raise ValueError("Please provide a dataset with `set_data()`.")

        # Get the full design matrix

kepler.py:

1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
        Returns:
            lnlike: The log marginal likelihood.
        """
        # TODO: Implement for spectral maps?
        self._no_spectral()

        # Check that the data is set
        if self._flux is None or self._C is None:
            raise ValueError("Please provide a dataset with `set_data()`.")

        # Get the full design matrix

maps.py:

2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
    ), "Differential rotation orders above 3 are not supported."
    if drorder > 0:
        assert ydeg > 0, "Differential rotation requires `ydeg` >= 1."

        # TODO: phase this next warning out
        logger.warning(
            "Differential rotation is still an experimental feature. "
            "Use it with care."
        )

        Ddeg = (4 * drorder + 1) * ydeg
        if Ddeg >= 50:

maps.py:

2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
    # Limb-darkened?
    if (ydeg == 0) and (rv is False) and (reflected is False):

        # TODO: Add support for wavelength-dependent limb darkening
        if nw is not None:
            raise NotImplementedError(
                "Multi-wavelength limb-darkened maps are not yet supported."
            )

        Bases = (LimbDarkenedBase, MapBase)
    else:

core.py:

79
80
81
82
83
84
85
86
87
88
89
90
        self._tensordotRz = tensordotRzOp(self._c_ops.tensordotRz)
        self._dotR = dotROp(self._c_ops.dotR)

        # Filter
        # TODO: Make the filter operator sparse
        self._F = FOp(self._c_ops.F, self._c_ops.N, self._c_ops.Ny)

        # Differential rotation
        self._tensordotD = tensordotDOp(self._c_ops.tensordotD)

        # Misc
        self._spotYlm = spotYlmOp(self._c_ops.spotYlm, self.ydeg, self.nw)

core.py:

102
103
104
105
106
107
108
109
110
111
112
                self._minimize = minimizeOp(
                    self.intensity, self.P, self.ydeg, self.udeg, self.fdeg
                )
        else:
            # TODO: Implement minimization for spectral maps?
            self._minimize = None
        self._LimbDarkIsPhysical = LDPhysicalOp(_c_ops.nroots)

    @property
    def rT(self):
        return self._rT

core.py:

364
365
366
367
368
369
370
371
372
373
374
375
        if self.ydeg == 0:
            return M

        # Rotate to the sky frame
        # TODO: Do this in a single compound rotation
        M = self.dotR(
            self.dotR(
                self.dotR(
                    M,
                    -tt.cos(obl),
                    -tt.sin(obl),
                    math.to_tensor(0.0),

core.py:

465
466
467
468
469
470
471
472
473
474
475
476
                -theta,
            )

        # Rotate to the sky frame
        # TODO: Do this in a single compound rotation
        MT = self.dotR(
            self.dotR(
                self.dotR(
                    MT,
                    math.to_tensor(1.0),
                    math.to_tensor(0.0),
                    math.to_tensor(0.0),

core.py:

627
628
629
630
631
632
633
634
635
636
637
638
        The method `render` requires a bunch of dummy params for
        compatibility with the `System` class. This method is a
        convenience method for use in the `Map` class.
        """
        # TODO: There may be a bug in Theano related to
        # tt.mgrid; I get different results depending on whether the
        # function is compiled using `theano.function()` or if it
        # is evaluated using `.eval()`. The small perturbation to `res`
        # is a temporary fix that ensures that `y` and `x` are of the
        # correct length in all cases I've tested.

        # Compute the Cartesian grid

core.py:

1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
        sec_veq,
        keplerian,
    ):
        """Compute the observed system radial velocity (RV maps only)."""
        # TODO: This method is currently very inefficient, as it
        # calls `X` twice per call and instantiates an `orbit`
        # instance up to three separate times per call. We should
        # re-code the logic from `X()` in here to optimize it.

        # Compute the RV filter
        pri_f = self.primary.map.ops.compute_rv_filter(
            pri_inc, pri_obl, pri_veq, pri_alpha

utils.py:

53
54
55
56
57
58
59
60
61
62
63
64
        - an integer (`int`, `np.int`, `np.int16`, `np.int32`, `np.int64`)
        - a numpy boolean (`np.array(True)`, `np.array(False)`)
        - a numpy float array with ndim equal to 0, 1, 2, or 3

    # TODO: Cast lists to arrays and floats to np.array(float)

    """
    ttype = type(arg)
    if is_theano(arg):
        return ttype
    else:
        if ttype in integers:

polybasis.py:

65
66
67
68
69
70
71
72
73
74
    def perform(self, node, inputs, outputs):
        x, y, z, bpT = inputs

        # TODO: When any of the coords are zero, there's a div
        # by zero below. This hack fixes the issue. We should
        # think of a better way of doing this!
        tol = 1e-8
        x[np.abs(x) < tol] = tol
        y[np.abs(y) < tol] = tol
        z[np.abs(z) < tol] = tol

basis.h:

43
44
45
46
47
48
49
50
51
52
53
/**
Compute the `P(z)` part of the Ylm vectors.

TODO: This can be sped up with sparse algebra.

*/
template <typename Scalar>
inline void legendre(int lmax,
                     std::vector<std::vector<Eigen::Triplet<Scalar>>> &M) {
  // Compute densely
  int N = (lmax + 1) * (lmax + 1);

filter.h:

247
248
249
250
251
252
253
254
255
256
257
    Vector<Scalar> pf;
    pf = B.A1_f * f;

    // Multiply them
    // TODO: DpDpf and DpDpu are sparse, and we should probably exploit that
    Vector<Scalar> p;
    if (udeg > fdeg) {
      computePolynomialProduct(udeg, pu, fdeg, pf, DpDpu, DpDpf);
    } else {
      computePolynomialProduct(fdeg, pf, udeg, pu, DpDpf, DpDpu);
    }

limbdark.h:

 2
 3
 4
 5
 6
 7
 8
 9
10
11
\file limbdark.h
\brief Limb darkening utilities from Agol, Luger & Foreman-Mackey (2019).

TODO: Loop downward in `v` until `J[v] != 0`
TODO: Test all special cases

*/

#ifndef _STARRY_LIMBDARK_H_
#define _STARRY_LIMBDARK_H_

limbdark.h:

 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
\file limbdark.h
\brief Limb darkening utilities from Agol, Luger & Foreman-Mackey (2019).

TODO: Loop downward in `v` until `J[v] != 0`
TODO: Test all special cases

*/

#ifndef _STARRY_LIMBDARK_H_
#define _STARRY_LIMBDARK_H_

#include <cmath>

misc.h:

 96
 97
 98
 99
100
101
102
103
104
105
106
107
                    RowVector<Scalar> &bamp,
                    Scalar &bsigma, Scalar &blat, Scalar &blon) {

  // Forward diff for sigma
  // TODO: Compute the backprop expression
  using ADType = ADScalar<Scalar, 1>;
  ADType sigma = sigma_;
  sigma.derivatives() = Vector<Scalar>::Unit(1, 0);

  // Compute the integrals recursively
  Vector<ADType> IP(l + 1);
  Vector<ADType> ID(l + 1);

solver_reflected.h:

176
177
178
179
180
181
182
183
184
185
186
  */
  inline Scalar compute(const Scalar &bterm, const RowVector<Scalar> &brT) {
    Scalar bb = 0.0;
    computeHI_with_grad(bterm);
    // TODO: The gradient is infinite when bterm = +/- 1
    // Not sure how best to handle this.
    Scalar fac = sqrt(max(Scalar(1.0) - bterm * bterm, tol));
    Scalar DfacDb = -bterm / fac;
    int n = 0;
    int i, j;
    int mu, nu;

wigner.h:

393
394
395
396
397
398
399
400
401
402
403
404
          tol_ad, D_ad, R_ad);

    // Extract the matrices and their derivatives
    for (int l = 0; l < ydeg + 1; ++l) {
      // TODO: This data copy is *very* slow; is there a better way?
      for (int i = 0; i < 2 * l + 1; ++i) {
        for (int j = 0; j < 2 * l + 1; ++j) {
          R[l](i, j) = R_ad[l](i, j).value();
          DRDx[l](i, j) = R_ad[l](i, j).derivatives()(0);
          DRDy[l](i, j) = R_ad[l](i, j).derivatives()(1);
          DRDz[l](i, j) = R_ad[l](i, j).derivatives()(2);
          DRDtheta[l](i, j) = R_ad[l](i, j).derivatives()(3);

wigner.h:

505
506
507
508
509
510
511
512
513
514
515
516
    dotR_bM.setZero(npts, Ny);
    if (unlikely(npts == 0)) return;

    // Dot them in
    // TODO: There must be a more efficient way of doing this.
    for (int l = 0; l < ydeg + 1; ++l) {
      // d / dargs
      dotR_bx += (M.block(0, l * l, npts, 2 * l + 1) * DRDx[l])
                     .cwiseProduct(bMR.block(0, l * l, npts, 2 * l + 1))
                     .sum();
      dotR_by += (M.block(0, l * l, npts, 2 * l + 1) * DRDy[l])
                     .cwiseProduct(bMR.block(0, l * l, npts, 2 * l + 1))

BandTriangularSolver.h:

13
14
15
16
17
18
19
20
21
22
23
24
namespace internal {

 /* \internal
  * Solve Ax=b with A a band triangular matrix
  * TODO: extend it to matrices for x abd b */
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, int StorageOrder>
struct band_solve_triangular_selector;


template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar>
struct band_solve_triangular_selector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,RowMajor>
{

PackedSelfadjointProduct.h:

30
31
32
33
34
35
36
37
38
39
40
    for (Index i=0; i<size; ++i)
    {
      Map<Matrix<Scalar,Dynamic,1> >(mat, UpLo==Lower ? size-i : (i+1)) += alpha * cj(vec[i]) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1)));
      //FIXME This should be handled outside.
      mat[UpLo==Lower ? 0 : i] = numext::real(mat[UpLo==Lower ? 0 : i]);
      mat += UpLo==Lower ? size-i : (i+1);
    }
  }
};

template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>

Rank2Update.h:

44
45
46
47
48
49
50
51
52
53
54
55
    {
      Map<Matrix<Scalar,Dynamic,1> >(mat+offset, UpLo==Lower ? size-i : (i+1)) +=
      numext::conj(alpha) * numext::conj(u[i]) * OtherMap(v+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1))
                + alpha * numext::conj(v[i]) * OtherMap(u+(UpLo==Lower ? i : 0), UpLo==Lower ? size-i : (i+1));
      //FIXME This should be handled outside.
      mat[offset+(UpLo==Lower ? 0 : i)] = numext::real(mat[offset+(UpLo==Lower ? 0 : i)]);
      offset += UpLo==Lower ? size-i : (i+1);
    }
  }
};

} // end namespace internal

level1_cplx_impl.h:

108
109
110
111
112
113
114
115
116
117
  Reverse<StridedVectorType> rvx(vx);
  Reverse<StridedVectorType> rvy(vy);

  // TODO implement mixed real-scalar rotations
       if(*incx<0 && *incy>0) internal::apply_rotation_in_the_plane(rvx, vy, JacobiRotation<Scalar>(c,s));
  else if(*incx>0 && *incy<0) internal::apply_rotation_in_the_plane(vx, rvy, JacobiRotation<Scalar>(c,s));
  else                        internal::apply_rotation_in_the_plane(vx, vy,  JacobiRotation<Scalar>(c,s));

  return 0;
}

level1_real_impl.h:

41
42
43
44
45
46
47
48
49
50
51
  else return 0;
}

// computes the Euclidean norm of a vector.
// FIXME
Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx)
{
//   std::cerr << "_nrm2 " << *n << " " << *incx << "\n";
  if(*n<=0) return 0;

  Scalar* x = reinterpret_cast<Scalar*>(px);

level1_real_impl.h:

84
85
86
87
88
89
90
91
92
93
94
95
{
  Scalar* x = reinterpret_cast<Scalar*>(px);
  Scalar* y = reinterpret_cast<Scalar*>(py);

  // TODO

  return 0;
}

// computes the modified parameters for a Givens rotation.
int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param)
{

level1_real_impl.h:

92
93
94
95
96
97
98
99
// computes the modified parameters for a Givens rotation.
int EIGEN_BLAS_FUNC(rotmg)(RealScalar *d1, RealScalar *d2, RealScalar *x1, RealScalar *x2, RealScalar *param)
{
  // TODO

  return 0;
}
*/

level3_impl.h:

264
265
266
267
268
269
270
271
272
273
274
  if(*m==0 || *n==0)
    return 1;

  // FIXME find a way to avoid this copy
  Matrix<Scalar,Dynamic,Dynamic,ColMajor> tmp = matrix(b,*m,*n,*ldb);
  matrix(b,*m,*n,*ldb).setZero();

  if(SIDE(*side)==LEFT)
  {
    internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m,1,false);
    func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha, blocking);

level3_impl.h:

317
318
319
320
321
322
323
324
325
326
327
328
  }

  int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
  #if ISCOMPLEX
  // FIXME add support for symmetric complex matrix
  Matrix<Scalar,Dynamic,Dynamic,ColMajor> matA(size,size);
  if(UPLO(*uplo)==UP)
  {
    matA.triangularView<Upper>() = matrix(a,size,size,*lda);
    matA.triangularView<Lower>() = matrix(a,size,size,*lda).transpose();
  }
  else if(UPLO(*uplo)==LO)

level3_impl.h:

406
407
408
409
410
411
412
413
414
415
416
417
  if(*n==0 || *k==0)
    return 0;

  #if ISCOMPLEX
  // FIXME add support for symmetric complex matrix
  if(UPLO(*uplo)==UP)
  {
    if(OP(*op)==NOTR)
      matrix(c, *n, *n, *ldc).triangularView<Upper>() += alpha * matrix(a,*n,*k,*lda) * matrix(a,*n,*k,*lda).transpose();
    else
      matrix(c, *n, *n, *ldc).triangularView<Upper>() += alpha * matrix(a,*k,*n,*lda).transpose() * matrix(a,*k,*n,*lda);
  }

eigenvalues.cpp:

12
13
14
15
16
17
18
19
20
21
22
// computes eigen values and vectors of a general N-by-N matrix A
EIGEN_LAPACK_FUNC(syev,(char *jobz, char *uplo, int* n, Scalar* a, int *lda, Scalar* w, Scalar* /*work*/, int* lwork, int *info))
{
  // TODO exploit the work buffer
  bool query_size = *lwork==-1;

  *info = 0;
        if(*jobz!='N' && *jobz!='V')                    *info = -1;
  else  if(UPLO(*uplo)==INVALID)                        *info = -2;
  else  if(*n<0)                                        *info = -3;
  else  if(*lda<std::max(1,*n))                         *info = -5;

svd.cpp:

13
14
15
16
17
18
19
20
21
22
23
24
// computes the singular values/vectors a general M-by-N matrix A using divide-and-conquer
EIGEN_LAPACK_FUNC(gesdd,(char *jobz, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork,
                         EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int * /*iwork*/, int *info))
{
  // TODO exploit the work buffer
  bool query_size = *lwork==-1;
  int diag_size = (std::min)(*m,*n);

  *info = 0;
        if(*jobz!='A' && *jobz!='S' && *jobz!='O' && *jobz!='N')  *info = -1;
  else  if(*m<0)                                                  *info = -2;
  else  if(*n<0)                                                  *info = -3;

svd.cpp:

84
85
86
87
88
89
90
91
92
93
94
95
// computes the singular values/vectors a general M-by-N matrix A using two sided jacobi algorithm
EIGEN_LAPACK_FUNC(gesvd,(char *jobu, char *jobv, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork,
                         EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int *info))
{
  // TODO exploit the work buffer
  bool query_size = *lwork==-1;
  int diag_size = (std::min)(*m,*n);

  *info = 0;
        if( *jobu!='A' && *jobu!='S' && *jobu!='O' && *jobu!='N') *info = -1;
  else  if((*jobv!='A' && *jobv!='S' && *jobv!='O' && *jobv!='N')
           || (*jobu=='O' && *jobv=='O'))                         *info = -2;

array.cpp:

198
199
200
201
202
203
204
205
206
207
208
209
  VERIFY( (m1<-a || m1>a).count() == (m1.abs()>a).count());

  typedef Array<typename ArrayType::Index, Dynamic, 1> ArrayOfIndices;

  // TODO allows colwise/rowwise for array
  VERIFY_IS_APPROX(((m1.abs()+1)>RealScalar(0.1)).colwise().count(), ArrayOfIndices::Constant(cols,rows).transpose());
  VERIFY_IS_APPROX(((m1.abs()+1)>RealScalar(0.1)).rowwise().count(), ArrayOfIndices::Constant(rows, cols));
}

template<typename ArrayType> void array_real(const ArrayType& m)
{
  using std::abs;

array_for_matrix.cpp:

141
142
143
144
145
146
147
148
149
150
151
152
  VERIFY( ((m1.array()<-a).matrix() || (m1.array()>a).matrix()).count() == (m1.cwiseAbs().array()>a).count());

  typedef Matrix<typename MatrixType::Index, Dynamic, 1> VectorOfIndices;

  // TODO allows colwise/rowwise for array
  VERIFY_IS_APPROX(((m1.array().abs()+1)>RealScalar(0.1)).matrix().colwise().count(), VectorOfIndices::Constant(cols,rows).transpose());
  VERIFY_IS_APPROX(((m1.array().abs()+1)>RealScalar(0.1)).matrix().rowwise().count(), VectorOfIndices::Constant(rows, cols));
}

template<typename VectorType> void lpNorm(const VectorType& v)
{
  using std::sqrt;

ctorleak.cpp:

10
11
12
13
14
15
16
17
18
19
20
  Foo()
  {
#ifdef EIGEN_EXCEPTIONS
    // TODO: Is this the correct way to handle this?
    if (Foo::object_count > Foo::object_limit) { std::cout << "\nThrow!\n"; throw Foo::Fail(); }
#endif
      std::cout << '+';
    ++Foo::object_count;
  }

  ~Foo()

denseLM.cpp:

106
107
108
109
110
111
112
113
114
115
116
  info = lm.minimize(uv);

  VERIFY_IS_EQUAL(info, 1);
  //FIXME Check other parameters
  return info;
}

template<typename FunctorType, typename VectorType>
int test_lmder(FunctorType& functor, VectorType& uv)
{
  typedef typename VectorType::Scalar Scalar;

denseLM.cpp:

119
120
121
122
123
124
125
126
127
128
129
130
  LevenbergMarquardt<FunctorType> lm(functor);
  info = lm.lmder1(uv);

  VERIFY_IS_EQUAL(info, 1);
  //FIXME Check other parameters
  return info;
}

template<typename FunctorType, typename VectorType>
int test_minimizeSteps(FunctorType& functor, VectorType& uv)
{
  LevenbergMarquardtSpace::Status info;

denseLM.cpp:

137
138
139
140
141
142
143
144
145
146
147
148
    info = lm.minimizeOneStep(uv);
  } while (info==LevenbergMarquardtSpace::Running);

  VERIFY_IS_EQUAL(info, 1);
  //FIXME Check other parameters
  return info;
}

template<typename T>
void test_denseLM_T()
{
  typedef Matrix<T,Dynamic,1> VectorType;

dynalloc.cpp:

82
83
84
85
86
87
88
89
90
91
92
93
};

template<typename T> void check_dynaligned()
{
  // TODO have to be updated once we support multiple alignment values
  if(T::SizeAtCompileTime % ALIGNMENT == 0)
  {
    T* obj = new T;
    VERIFY(T::NeedsToAlign==1);
    VERIFY(internal::UIntPtr(obj)%ALIGNMENT==0);
    delete obj;
  }

eigensolver_generalized_real.cpp:

52
53
54
55
56
57
58
59
60
61
62
  // non symmetric case:
  {
    GeneralizedEigenSolver<MatrixType> eig(rows);
    // TODO enable full-prealocation of required memory, this probably requires an in-place mode for HessenbergDecomposition
    //Eigen::internal::set_is_malloc_allowed(false);
    eig.compute(a,b);
    //Eigen::internal::set_is_malloc_allowed(true);
    for(Index k=0; k<cols; ++k)
    {
      Matrix<ComplexScalar,Dynamic,Dynamic> tmp = (eig.betas()(k)*a).template cast<ComplexScalar>() - eig.alphas()(k)*b;
      if(tmp.size()>1 && tmp.norm()>(std::numeric_limits<Scalar>::min)())

eigensolver_selfadjoint.cpp:

149
150
151
152
153
154
155
156
157
158
159
  VERIFY_IS_APPROX(tridiag.diagonal(), tridiag.matrixT().diagonal());
  VERIFY_IS_APPROX(tridiag.subDiagonal(), tridiag.matrixT().template diagonal<-1>());
  Matrix<RealScalar,Dynamic,Dynamic> T = tridiag.matrixT();
  if(rows>1 && cols>1) {
    // FIXME check that upper and lower part are 0:
    //VERIFY(T.topRightCorner(rows-2, cols-2).template triangularView<Upper>().isZero());
  }
  VERIFY_IS_APPROX(tridiag.diagonal(), T.diagonal());
  VERIFY_IS_APPROX(tridiag.subDiagonal(), T.template diagonal<1>());
  VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView<Lower>()), tridiag.matrixQ() * tridiag.matrixT().eval() * MatrixType(tridiag.matrixQ()).adjoint());
  VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView<Lower>()), tridiag.matrixQ() * tridiag.matrixT() * tridiag.matrixQ().adjoint());

evaluators.cpp:

260
261
262
263
264
265
266
267
268
269
270
271
    // test direct traversal
    Matrix3f m3;
    Array33f a3;
    VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity());  // matrix, nullary
    // TODO: find a way to test direct traversal with array
    VERIFY_IS_APPROX_EVALUATOR(m3.transpose(), Matrix3f::Identity().transpose());  // transpose
    VERIFY_IS_APPROX_EVALUATOR(m3, 2 * Matrix3f::Identity());  // unary
    VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity() + Matrix3f::Zero());  // binary
    VERIFY_IS_APPROX_EVALUATOR(m3.block(0,0,2,2), Matrix3f::Identity().block(1,1,2,2));  // block

    // test linear traversal
    VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Zero());  // matrix, nullary

evaluators.cpp:

279
280
281
282
283
284
285
286
287
288
289
290
    Array44f a4, a4src = Matrix4f::Random();
    VERIFY_IS_APPROX_EVALUATOR(m4, m4src);  // matrix
    VERIFY_IS_APPROX_EVALUATOR(a4, a4src);  // array
    VERIFY_IS_APPROX_EVALUATOR(m4.transpose(), m4src.transpose());  // transpose
    // TODO: find out why Matrix4f::Zero() does not allow inner vectorization
    VERIFY_IS_APPROX_EVALUATOR(m4, 2 * m4src);  // unary
    VERIFY_IS_APPROX_EVALUATOR(m4, m4src + m4src);  // binary

    // test linear vectorization
    MatrixXf mX(6,6), mXsrc = MatrixXf::Random(6,6);
    ArrayXXf aX(6,6), aXsrc = ArrayXXf::Random(6,6);
    VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc);  // matrix

geo_quaternion.cpp:

239
240
241
242
243
244
245
246
247
248
249
250
  const Quaternionx& cq3(q3);
  VERIFY( &cq3.x() == &q3.x() );
  const MQuaternionUA& cmq3(mq3);
  VERIFY( &cmq3.x() == &mq3.x() );
  // FIXME the following should be ok. The problem is that currently the LValueBit flag
  // is used to determine wether we can return a coeff by reference or not, which is not enough for Map<const ...>.
  //const MCQuaternionUA& cmcq3(mcq3);
  //VERIFY( &cmcq3.x() == &mcq3.x() );
}

template<typename Scalar> void quaternionAlignment(void){
  typedef Quaternion<Scalar,AutoAlign> QuaternionA;

geo_transformations.cpp:

165
166
167
168
169
170
171
172
173
174
175
176
  VERIFY_IS_APPROX(AngleAxisx(m).toRotationMatrix(),
    Quaternionx(m).toRotationMatrix());

  // Transform
  // TODO complete the tests !
  a = 0;
  while (abs(a)<Scalar(0.1))
    a = internal::random<Scalar>(-Scalar(0.4)*Scalar(EIGEN_PI), Scalar(0.4)*Scalar(EIGEN_PI));
  q1 = AngleAxisx(a, v0.normalized());
  Transform3 t0, t1, t2;

  // first test setIdentity() and Identity()

hessenberg.cpp:

45
46
47
48
49
50
51
52
53
54
55
56
  VERIFY_RAISES_ASSERT( hessUninitialized.matrixQ() );
  VERIFY_RAISES_ASSERT( hessUninitialized.householderCoefficients() );
  VERIFY_RAISES_ASSERT( hessUninitialized.packedMatrix() );

  // TODO: Add tests for packedMatrix() and householderCoefficients()
}

void test_hessenberg()
{
  CALL_SUBTEST_1(( hessenberg<std::complex<double>,1>() ));
  CALL_SUBTEST_2(( hessenberg<std::complex<double>,2>() ));
  CALL_SUBTEST_3(( hessenberg<std::complex<float>,4>() ));

nomalloc.cpp:

148
149
150
151
152
153
154
155
156
157
158
159
  Eigen::ColPivHouseholderQR<Matrix>  cpQR; cpQR.compute(A);
  X = cpQR.solve(B);
  x = cpQR.solve(b);
  Eigen::FullPivHouseholderQR<Matrix> fpQR; fpQR.compute(A);
  // FIXME X = fpQR.solve(B);
  x = fpQR.solve(b);

  // SVD module
  Eigen::JacobiSVD<Matrix> jSVD; jSVD.compute(A, ComputeFullU | ComputeFullV);
}

void test_zerosized() {

prec_inverse_4x4.cpp:

48
49
50
51
52
53
54
55
56
57
58
59
  std::cerr << "inverse_general_4x4, Scalar = " << type_name<Scalar>() << std::endl;
  double error_avg = error_sum / repeat;
  EIGEN_DEBUG_VAR(error_avg);
  EIGEN_DEBUG_VAR(error_max);
   // FIXME that 1.25 used to be a 1.0 until the NumTraits changes on 28 April 2010, what's going wrong??
   // FIXME that 1.25 used to be 1.2 until we tested gcc 4.1 on 30 June 2010 and got 1.21.
  VERIFY(error_avg < (NumTraits<Scalar>::IsComplex ? 8.0 : 1.25));
  VERIFY(error_max < (NumTraits<Scalar>::IsComplex ? 64.0 : 20.0));

  {
    int s = 5;//internal::random<int>(4,10);
    int i = 0;//internal::random<int>(0,s-4);

prec_inverse_4x4.cpp:

49
50
51
52
53
54
55
56
57
58
59
60
  double error_avg = error_sum / repeat;
  EIGEN_DEBUG_VAR(error_avg);
  EIGEN_DEBUG_VAR(error_max);
   // FIXME that 1.25 used to be a 1.0 until the NumTraits changes on 28 April 2010, what's going wrong??
   // FIXME that 1.25 used to be 1.2 until we tested gcc 4.1 on 30 June 2010 and got 1.21.
  VERIFY(error_avg < (NumTraits<Scalar>::IsComplex ? 8.0 : 1.25));
  VERIFY(error_max < (NumTraits<Scalar>::IsComplex ? 64.0 : 20.0));

  {
    int s = 5;//internal::random<int>(4,10);
    int i = 0;//internal::random<int>(0,s-4);
    int j = 0;//internal::random<int>(0,s-4);

product_large.cpp:

59
60
61
62
63
64
65
66
67
68
69
70
  }

  {
    // check the functions to setup blocking sizes compile and do not segfault
    // FIXME check they do what they are supposed to do !!
    std::ptrdiff_t l1 = internal::random<int>(10000,20000);
    std::ptrdiff_t l2 = internal::random<int>(100000,200000);
    std::ptrdiff_t l3 = internal::random<int>(1000000,2000000);
    setCpuCacheSizes(l1,l2,l3);
    VERIFY(l1==l1CacheSize());
    VERIFY(l2==l2CacheSize());
    std::ptrdiff_t k1 = internal::random<int>(10,100)*16;

product_trmm.cpp:

76
77
78
79
80
81
82
83
84
85
86
87
    VERIFY_IS_APPROX( ge_xs = (s1*mat).adjoint().template triangularView<Mode>() * ge_left.adjoint(), numext::conj(s1) * triTr.conjugate() * ge_left.adjoint());
  VERIFY_IS_APPROX( ge_xs = (s1*mat).transpose().template triangularView<Mode>() * ge_left.adjoint(), s1triTr * ge_left.adjoint());


  // TODO check with sub-matrix expressions ?
}

template<typename Scalar, int Mode, int TriOrder>
void trmv(int rows=get_random_size<Scalar>(), int cols=get_random_size<Scalar>())
{
  trmm<Scalar,Mode,TriOrder,ColMajor,ColMajor,1>(rows,cols,1);
}

product_trmv.cpp:

67
68
69
70
71
72
73
74
75
76
77
78
  VERIFY((v1.transpose() * m3).isApprox(v1.transpose() * m1.template triangularView<Eigen::Lower>(), largerEps));
  VERIFY((v1.adjoint() * m3).isApprox(v1.adjoint() * m1.template triangularView<Eigen::Lower>(), largerEps));
  VERIFY((v1.adjoint() * m3.adjoint()).isApprox(v1.adjoint() * m1.template triangularView<Eigen::Lower>().adjoint(), largerEps));

  // TODO check with sub-matrices
}

void test_product_trmv()
{
  int s = 0;
  for(int i = 0; i < g_repeat ; i++) {
    CALL_SUBTEST_1( trmv(Matrix<float, 1, 1>()) );

qr.cpp:

37
38
39
40
41
42
43
44
45
46
47
48
  Matrix<Scalar,Rows,Cols> m1 = Matrix<Scalar,Rows,Cols>::Random();
  HouseholderQR<Matrix<Scalar,Rows,Cols> > qr(m1);

  Matrix<Scalar,Rows,Cols> r = qr.matrixQR();
  // FIXME need better way to construct trapezoid
  for(int i = 0; i < Rows; i++) for(int j = 0; j < Cols; j++) if(i>j) r(i,j) = Scalar(0);

  VERIFY_IS_APPROX(m1, qr.householderQ() * r);

  Matrix<Scalar,Cols,Cols2> m2 = Matrix<Scalar,Cols,Cols2>::Random(Cols,Cols2);
  Matrix<Scalar,Rows,Cols2> m3 = m1*m2;
  m2 = Matrix<Scalar,Cols,Cols2>::Random(Cols,Cols2);

qr_fullpivoting.cpp:

37
38
39
40
41
42
43
44
45
46
47
  MatrixQType q = qr.matrixQ();
  VERIFY_IS_UNITARY(q);

  // FIXME need better way to construct trapezoid
  for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) if(i>j) r(i,j) = Scalar(0);

  MatrixType c = qr.matrixQ() * r * qr.colsPermutation().inverse();

  VERIFY_IS_APPROX(m1, c);

  // stress the ReturnByValue mechanism

qr_fullpivoting.cpp:

128
129
130
131
132
133
134
135
136
137
138
void test_qr_fullpivoting()
{
 for(int i = 0; i < 1; i++) {
    // FIXME : very weird bug here
//     CALL_SUBTEST(qr(Matrix2f()) );
    CALL_SUBTEST_1( qr<MatrixXf>() );
    CALL_SUBTEST_2( qr<MatrixXd>() );
    CALL_SUBTEST_3( qr<MatrixXcd>() );
  }

  for(int i = 0; i < g_repeat; i++) {

real_qz.cpp:

42
43
44
45
46
47
48
49
50
51
52
53
    break;
  }

  RealQZ<MatrixType> qz(dim);
  // TODO enable full-prealocation of required memory, this probably requires an in-place mode for HessenbergDecomposition
  //Eigen::internal::set_is_malloc_allowed(false);
  qz.compute(A,B);
  //Eigen::internal::set_is_malloc_allowed(true);

  VERIFY_IS_EQUAL(qz.info(), Success);
  // check for zeros
  bool all_zeros = true;

sparse.h:

76
77
78
79
80
81
82
83
84
85
86
87
        std::swap(ai,aj);
      Scalar v = (internal::random<double>(0,1) < density) ? internal::random<Scalar>() : Scalar(0);
      if ((flags&ForceNonZeroDiag) && (i==j))
      {
        // FIXME: the following is too conservative
        v = internal::random<Scalar>()*Scalar(3.);
        v = v*v;
        if(numext::real(v)>0) v += Scalar(5);
        else                  v -= Scalar(5);
      }
      if ((flags & MakeLowerTriangular) && aj>ai)
        v = Scalar(0);

sparse_product.cpp:

286
287
288
289
290
291
292
293
294
295
296
297
    mLo = mUp.adjoint();
    refS = refUp + refLo;
    refS.diagonal() *= 0.5;
    mS = mUp + mLo;
    // TODO be able to address the diagonal....
    for (int k=0; k<mS.outerSize(); ++k)
      for (typename SparseMatrixType::InnerIterator it(mS,k); it; ++it)
        if (it.index() == k)
          it.valueRef() *= Scalar(0.5);

    VERIFY_IS_APPROX(refS.adjoint(), refS);
    VERIFY_IS_APPROX(mS.adjoint(), mS);

tensor_benchmarks.h:

12
13
14
15
16
17
18
19
20
21
22
using Eigen::Tensor;
using Eigen::TensorMap;

// TODO(bsteiner): also templatize on the input type since we have users
// for int8 as well as floats.
template <typename Device, typename T> class BenchmarkSuite {
 public:
  BenchmarkSuite(const Device& device, size_t m, size_t k, size_t n)
      : m_(m), k_(k), n_(n), device_(device) {
    initialize();
  }

camera.cpp:

100
101
102
103
104
105
106
107
108
109
110
111
}

void Camera::setDirection(const Vector3f& newDirection)
{
    // TODO implement it computing the rotation between newDirection and current dir ?
    Vector3f up = this->up();

    Matrix3f camAxes;

    camAxes.col(2) = (-newDirection).normalized();
    camAxes.col(0) = up.cross( camAxes.col(2) ).normalized();
    camAxes.col(1) = camAxes.col(2).cross( camAxes.col(0) ).normalized();

camera.cpp:

257
258
259
260
261
262
263
264
    Vector3f a(2.*uv.x()/float(mVpWidth)-1., 2.*uv.y()/float(mVpHeight)-1., 1.);
    a.x() *= depth/mProjectionMatrix(0,0);
    a.y() *= depth/mProjectionMatrix(1,1);
    a.z() = -depth;
    // FIXME /\/|
    Vector4f b = invModelview * Vector4f(a.x(), a.y(), a.z(), 1.);
    return Vector3f(b.x(), b.y(), b.z());
}

MatrixBase_triangularView.cpp:

5
6
7
8
9
cout << "Here is the strictly-upper-triangular matrix extracted from m:" << endl
     << Matrix3i(m.triangularView<Eigen::StrictlyUpper>()) << endl;
cout << "Here is the unit-lower-triangular matrix extracted from m:" << endl
     << Matrix3i(m.triangularView<Eigen::UnitLower>()) << endl;
// FIXME need to implement output for triangularViews (Bug 885)

AlignedVector3:

33
34
35
36
37
38
39
40
41
42
43
44
  * the same result can be achieved by directly using a 4D vector.
  * This class makes this process simpler.
  *
  */
// TODO specialize Cwise
template<typename _Scalar> class AlignedVector3;

namespace internal {
template<typename _Scalar> struct traits<AlignedVector3<_Scalar> >
  : traits<Matrix<_Scalar,3,1,0,4,1> >
{
};

FFT:

76
77
78
79
80
81
82
83
84
85
86
87
     //template <typename T> typedef struct internal::fftw_impl  default_fft_impl; this does not work
     template <typename T> struct default_fft_impl : public internal::fftw_impl<T> {};
   }
#elif defined EIGEN_MKL_DEFAULT
// TODO
// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form
#  include "src/FFT/ei_imklfft_impl.h"
   namespace Eigen {
     template <typename T> struct default_fft_impl : public internal::imklfft_impl {};
   }
#else
// internal::kissfft_impl:  small, free, reasonably efficient default, derived from kissfft

FFT:

355
356
357
358
359
360
361
362
363
364
365
366
    }


    /*
    // TODO: multi-dimensional FFTs
    inline
    void inv2(Complex * dst, const Complex * src, int n0,int n1)
    {
      m_impl.inv2(dst,src,n0,n1);
      if ( HasFlag( Unscaled ) == false)
          scale(dst,1./(n0*n1),n0*n1);
    }

NonLinearOptimization.cpp:

235
236
237
238
239
240
241
242
243
244
  MatrixXd cov;
  cov =  covfac*lm.fjac.topLeftCorner<n,n>();
  VERIFY_IS_APPROX( cov, cov_ref);
  // TODO: why isn't this allowed ? :
  // VERIFY_IS_APPROX( covfac*fjac.topLeftCorner<n,n>() , cov_ref);
}

struct hybrj_functor : Functor<double>
{
    hybrj_functor(void) : Functor<double>(9,9) {}

NonLinearOptimization.cpp:

626
627
628
629
630
631
632
633
634
635
636
  MatrixXd cov;
  cov =  covfac*lm.fjac.topLeftCorner<n,n>();
  VERIFY_IS_APPROX( cov, cov_ref);
  // TODO: why isn't this allowed ? :
  // VERIFY_IS_APPROX( covfac*fjac.topLeftCorner<n,n>() , cov_ref);
}

struct chwirut2_functor : Functor<double>
{
    chwirut2_functor(void) : Functor<double>(3,54) {}
    static const double m_x[54];

autodiff.cpp:

210
211
212
213
214
215
216
217
218
219
220
221
    VERIFY_IS_APPROX(y, yref);
    VERIFY_IS_APPROX(j, jref);
}

// TODO also check actual derivatives!
template <int>
void test_autodiff_scalar()
{
  Vector2f p = Vector2f::Random();
  typedef AutoDiffScalar<Vector2f> AD;
  AD ax(p.x(),Vector2f::UnitX());
  AD ay(p.y(),Vector2f::UnitY());

autodiff.cpp:

223
224
225
226
227
228
229
230
231
232
233
234
  VERIFY_IS_APPROX(res.value(), foo(p.x(),p.y()));
}


// TODO also check actual derivatives!
template <int>
void test_autodiff_vector()
{
  Vector2f p = Vector2f::Random();
  typedef AutoDiffScalar<Vector2f> AD;
  typedef Matrix<AD,2,1> VectorAD;
  VectorAD ap = p.cast<AD>();

autodiff_scalar.cpp:

11
12
13
14
15
16
17
18
19
20
21
#include <unsupported/Eigen/AutoDiff>

/*
 * In this file scalar derivations are tested for correctness.
 * TODO add more tests!
 */

template<typename Scalar> void check_atan2()
{
  typedef Matrix<Scalar, 1, 1> Deriv1;
  typedef AutoDiffScalar<Deriv1> AD;

cxx11_tensor_concatenation.cpp:

 99
100
101
102
103
104
105
106
107
108
109
110
  }
}


// TODO(phli): Add test once we have a real vectorized implementation.
// static void test_vectorized_concatenation() {}

static void test_concatenation_as_lvalue()
{
  Tensor<int, 2> t1(2, 3);
  Tensor<int, 2> t2(2, 3);
  t1.setRandom();

cxx11_tensor_random.cpp:

15
16
17
18
19
20
21
22
23
24
25
26
{
  Tensor<float, 1> vec(6);
  vec.setRandom();

  // Fixme: we should check that the generated numbers follow a uniform
  // distribution instead.
  for (int i = 1; i < 6; ++i) {
    VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1));
  }
}

static void test_normal()

cxx11_tensor_random.cpp:

27
28
29
30
31
32
33
34
35
36
{
  Tensor<float, 1> vec(6);
  vec.setRandom<Eigen::internal::NormalRandomGenerator<float>>();

  // Fixme: we should check that the generated numbers follow a gaussian
  // distribution instead.
  for (int i = 1; i < 6; ++i) {
    VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1));
  }
}

levenberg_marquardt.cpp:

 8
 9
10
11
12
13
14
15
16
17
18
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.


// FIXME: These tests all check for hard-coded values. Ideally, parameters and start estimates should be randomized.


#include <stdio.h>

#include "main.h"
#include <unsupported/Eigen/LevenbergMarquardt>

levenberg_marquardt.cpp:

133
134
135
136
137
138
139
140
141
142
143
  MatrixXd cov;
  cov =  covfac*lm.matrixR().topLeftCorner<n,n>();
  VERIFY_IS_APPROX( cov, cov_ref);
  // TODO: why isn't this allowed ? :
  // VERIFY_IS_APPROX( covfac*fjac.topLeftCorner<n,n>() , cov_ref);
}

struct lmdif_functor : DenseFunctor<double>
{
    lmdif_functor(void) : DenseFunctor<double>(3,15) {}
    int operator()(const VectorXd &x, VectorXd &fvec) const

levenberg_marquardt.cpp:

236
237
238
239
240
241
242
243
244
245
246
  MatrixXd cov;
  cov =  covfac*lm.matrixR().topLeftCorner<n,n>();
  VERIFY_IS_APPROX( cov, cov_ref);
  // TODO: why isn't this allowed ? :
  // VERIFY_IS_APPROX( covfac*fjac.topLeftCorner<n,n>() , cov_ref);
}

struct chwirut2_functor : DenseFunctor<double>
{
    chwirut2_functor(void) : DenseFunctor<double>(3,54) {}
    static const double m_x[54];

LDLT.h:

224
225
226
227
228
229
230
231
232
233
234
    LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);

    /** \returns the internal LDLT decomposition matrix
      *
      * TODO: document the storage layout
      */
    inline const MatrixType& matrixLDLT() const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
      return m_matrix;
    }

LDLT.h:

499
500
501
502
503
504
505
506
507
508
509
510
  m_matrix = a.derived();

  // Compute matrix L1 norm = max abs column sum.
  m_l1_norm = RealScalar(0);
  // TODO move this code to SelfAdjointView
  for (Index col = 0; col < size; ++col) {
    RealScalar abs_col_sum;
    if (_UpLo == Lower)
      abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
    else
      abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
    if (abs_col_sum > m_l1_norm)

LLT.h:

166
167
168
169
170
171
172
173
174
175
176
    }

    /** \returns the LLT decomposition matrix
      *
      * TODO: document the storage layout
      */
    inline const MatrixType& matrixLLT() const
    {
      eigen_assert(m_isInitialized && "LLT is not initialized.");
      return m_matrix;
    }

LLT.h:

433
434
435
436
437
438
439
440
441
442
443
444
    m_matrix = a.derived();

  // Compute matrix L1 norm = max abs column sum.
  m_l1_norm = RealScalar(0);
  // TODO move this code to SelfAdjointView
  for (Index col = 0; col < size; ++col) {
    RealScalar abs_col_sum;
    if (_UpLo == Lower)
      abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
    else
      abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
    if (abs_col_sum > m_l1_norm)

CholmodSupport.h:

298
299
300
301
302
303
304
305
306
307
308
309
      {
        this->m_info = NumericalIssue;
        return;
      }
      // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
      dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
      cholmod_free_dense(&x_cd, &m_cholmod);
    }

    /** \internal */
    template<typename RhsDerived, typename DestDerived>
    void _solve_impl(const SparseMatrixBase<RhsDerived> &b, SparseMatrixBase<DestDerived> &dest) const

CholmodSupport.h:

321
322
323
324
325
326
327
328
329
330
331
332
      {
        this->m_info = NumericalIssue;
        return;
      }
      // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
      dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs);
      cholmod_free_sparse(&x_cs, &m_cholmod);
    }
    #endif // EIGEN_PARSED_BY_DOXYGEN


    /** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization.

Array.h:

135
136
137
138
139
140
141
142
143
144
145
146
      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }

#ifndef EIGEN_PARSED_BY_DOXYGEN
    // FIXME is it still needed ??
    /** \internal */
    EIGEN_DEVICE_FUNC
    Array(internal::constructor_without_unaligned_array_assert)
      : Base(internal::constructor_without_unaligned_array_assert())
    {
      Base::_check_template_params();
      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED

AssignEvaluator.h:

54
55
56
57
58
59
60
61
62
63
64
65
    OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
    MaxSizeAtCompileTime = Dst::SizeAtCompileTime
  };

  // TODO distinguish between linear traversal and inner-traversals
  typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
  typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;

  enum {
    LinearPacketSize = unpacket_traits<LinearPacketType>::size,
    InnerPacketSize = unpacket_traits<InnerPacketType>::size
  };

AssignEvaluator.h:

191
192
193
194
195
196
197
198
199
200
201
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
{
  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
  typedef typename DstEvaluatorType::XprType DstXprType;

  enum {
    outer = Index / DstXprType::InnerSizeAtCompileTime,
    inner = Index % DstXprType::InnerSizeAtCompileTime
  };

AssignEvaluator.h:

256
257
258
259
260
261
262
263
264
265
266
template<typename Kernel, int Index, int Stop>
struct copy_using_evaluator_innervec_CompleteUnrolling
{
  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
  typedef typename DstEvaluatorType::XprType DstXprType;
  typedef typename Kernel::PacketType PacketType;

  enum {
    outer = Index / DstXprType::InnerSizeAtCompileTime,
    inner = Index % DstXprType::InnerSizeAtCompileTime,

AssignEvaluator.h:

372
373
374
375
376
377
378
379
380
381
382
383
struct unaligned_dense_assignment_loop<false>
{
  // MSVC must not inline this functions. If it does, it fails to optimize the
  // packet access path.
  // FIXME check which version exhibits this issue
#if EIGEN_COMP_MSVC
  template <typename Kernel>
  static EIGEN_DONT_INLINE void run(Kernel &kernel,
                                    Index start,
                                    Index end)
#else
  template <typename Kernel>

AssignEvaluator.h:

692
693
694
695
696
697
698
699
700
701
702
protected:
  DstEvaluatorType& m_dst;
  const SrcEvaluatorType& m_src;
  const Functor &m_functor;
  // TODO find a way to avoid the needs of the original expression
  DstXprType& m_dstExpr;
};

/***************************************************************************
* Part 5 : Entry point for dense rectangular assignment
***************************************************************************/

AssignEvaluator.h:

827
828
829
830
831
832
833
834
835
836
837
838
  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
  ActualDstType actualDst(dst);

  // TODO check whether this is the right place to perform these checks:
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);

  Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
}
template<typename Dst, typename Src>

AssignEvaluator.h:

845
846
847
848
849
850
851
852
853
854
855
856
template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
{
  // TODO check whether this is the right place to perform these checks:
  EIGEN_STATIC_ASSERT_LVALUE(Dst)
  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);

  Assignment<Dst,Src,Func>::run(dst, src, func);
}
template<typename Dst, typename Src>

AssignEvaluator.h:

880
881
882
883
884
885
886
887
888
889
890
891
  }
};

// Generic assignment through evalTo.
// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
// Note that the last template argument "Weak" is needed to make it possible to perform
// both partial specialization+SFINAE without ambiguous specialization
template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
{
  EIGEN_DEVICE_FUNC
  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)

Block.h:

46
47
48
49
50
51
52
53
54
55
56
57
    OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
                             ? int(outer_stride_at_compile_time<XprType>::ret)
                             : int(inner_stride_at_compile_time<XprType>::ret),

    // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
    Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
    // FIXME DirectAccessBit should not be handled by expressions
    //
    // Alignment is needed by MapBase's assertions
    // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator

Block.h:

50
51
52
53
54
55
56
57
58
59
60
    // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
    Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
    // FIXME DirectAccessBit should not be handled by expressions
    //
    // Alignment is needed by MapBase's assertions
    // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
    Alignment = 0
  };
};

Block.h:

414
415
416
417
418
419
420
421
422
423
424
425
      return m_startCol.value();
    }

  #ifndef __SUNPRO_CC
  // FIXME sunstudio is not friendly with the above friend...
  // META-FIXME there is no 'friend' keyword around here. Is this obsolete?
  protected:
  #endif

    #ifndef EIGEN_PARSED_BY_DOXYGEN
    /** \internal used by allowAligned() */
    EIGEN_DEVICE_FUNC

CommaInitializer.h:

45
46
47
48
49
50
51
52
53
54
55
56
  }

  /* Copy/Move constructor which transfers ownership. This is crucial in
   * absence of return value optimization to avoid assertions during destruction. */
  // FIXME in C++11 mode this could be replaced by a proper RValue constructor
  EIGEN_DEVICE_FUNC
  inline CommaInitializer(const CommaInitializer& o)
  : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
    // Mark original object as finished. In absence of R-value references we need to const_cast:
    const_cast<CommaInitializer&>(o).m_row = m_xpr.rows();
    const_cast<CommaInitializer&>(o).m_col = m_xpr.cols();
    const_cast<CommaInitializer&>(o).m_currentBlockRows = 0;

CoreEvaluators.h:

 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
  EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {}
};


// TODO: Think about const-correctness
template<typename T>
struct evaluator<const T>
  : evaluator<T>
{
  EIGEN_DEVICE_FUNC
  explicit evaluator(const T& xpr) : evaluator<T>(xpr) {}
};

CoreEvaluators.h:

107
108
109
110
111
112
113
114
115
116
template<typename ExpressionType>
struct evaluator_base : public noncopyable
{
  // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
  typedef traits<ExpressionType> ExpressionTraits;

  enum {
    Alignment = 0
  };
};

CoreEvaluators.h:

798
799
800
801
802
803
804
805
806
807
808
809
};

// -------------------- Map --------------------

// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ?
// but that might complicate template specialization
template<typename Derived, typename PlainObjectType>
struct mapbase_evaluator;

template<typename Derived, typename PlainObjectType>
struct mapbase_evaluator : evaluator_base<Derived>
{

CoreEvaluators.h:

897
898
899
900
901
902
903
904
905
906
907
908
  : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>
{
  typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
  typedef typename XprType::Scalar Scalar;
  // TODO: should check for smaller packet types once we can handle multi-sized packet types
  typedef typename packet_traits<Scalar>::type PacketScalar;

  enum {
    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
                             ? int(PlainObjectType::InnerStrideAtCompileTime)
                             : int(StrideType::InnerStrideAtCompileTime),
    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0

CoreEvaluators.h:

953
954
955
956
957
958
959
960
961
962
963
964
  : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
{
  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
  typedef typename XprType::Scalar Scalar;
  // TODO: should check for smaller packet types once we can handle multi-sized packet types
  typedef typename packet_traits<Scalar>::type PacketScalar;

  enum {
    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,

    RowsAtCompileTime = traits<XprType>::RowsAtCompileTime,
    ColsAtCompileTime = traits<XprType>::ColsAtCompileTime,

CoreEvaluators.h:

1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
  const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
  const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
};

// TODO: This evaluator does not actually use the child evaluator;
// all action is via the data() as returned by the Block expression.

template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true>
  : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,
                      typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
{

CoreEvaluators.h:

1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
  EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
    : mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
  {
    // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
    eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
  }
};


// -------------------- Select --------------------
// NOTE shall we introduce a ternary_evaluator?

CoreEvaluators.h:

1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
// -------------------- Select --------------------
// NOTE shall we introduce a ternary_evaluator?

// TODO enable vectorization for Select
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
  : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
{
  typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
  enum {
    CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost

CoreEvaluators.h:

1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,

    // let's enable LinearAccess only with vectorization because of the product overhead
    // FIXME enable DirectAccess with negative strides?
    Flags0 = evaluator<ArgType>::Flags,
    LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
                  || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1))
                 ? LinearAccessBit : 0,

    Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),

CoreEvaluators.h:

1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
  template<int LoadMode, typename PacketType>
  EIGEN_STRONG_INLINE
  void writePacket(Index row, Index col, const PacketType& x)
  {
    // FIXME we could factorize some code with packet(i,j)
    enum {
      PacketSize = unpacket_traits<PacketType>::size,
      OffsetRow  = ReverseRow && IsColMajor ? PacketSize : 1,
      OffsetCol  = ReverseCol && IsRowMajor ? PacketSize : 1
    };
    typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
    m_argImpl.template writePacket<LoadMode>(

CoreIterators.h:

26
27
28
29
30
31
32
33
34
35
36
37
  * \brief An InnerIterator allows to loop over the element of any matrix expression.
  *
  * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed.
  *
  * TODO: add a usage example
  */
template<typename XprType>
class InnerIterator
{
protected:
  typedef internal::inner_iterator_selector<XprType, typename internal::evaluator_traits<XprType>::Kind> IteratorType;
  typedef internal::evaluator<XprType> EvaluatorType;

DenseCoeffsBase.h:

508
509
510
511
512
513
514
515
516
517
518
519
    {
      return derived().outerStride();
    }

    // FIXME shall we remove it ?
    inline Index stride() const
    {
      return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
    }

    /** \returns the pointer increment between two consecutive rows.
      *

DenseCoeffsBase.h:

582
583
584
585
586
587
588
589
590
591
592
593
    {
      return derived().outerStride();
    }

    // FIXME shall we remove it ?
    inline Index stride() const
    {
      return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
    }

    /** \returns the pointer increment between two consecutive rows.
      *

EigenBase.h:

36
37
38
39
40
41
42
43
44
45
46
47
    * \sa StorageIndex, \ref TopicPreprocessorDirectives.
    */
  typedef Eigen::Index Index;

  // FIXME is it needed?
  typedef typename internal::traits<Derived>::StorageKind StorageKind;

  /** \returns a reference to the derived object */
  EIGEN_DEVICE_FUNC
  Derived& derived() { return *static_cast<Derived*>(this); }
  /** \returns a const reference to the derived object */
  EIGEN_DEVICE_FUNC

GeneralProduct.h:

84
85
86
87
88
89
90
91
92
93
94
/* The following allows to select the kind of product at compile time
 * based on the three dimensions of the product.
 * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one.
template<int M, int N>  struct product_type_selector<M,N,1>              { enum { ret = OuterProduct }; };
template<int M>         struct product_type_selector<M, 1, 1>            { enum { ret = LazyCoeffBasedProductMode }; };
template<int N>         struct product_type_selector<1, N, 1>            { enum { ret = LazyCoeffBasedProductMode }; };
template<int Depth>     struct product_type_selector<1,    1,    Depth>  { enum { ret = InnerProduct }; };
template<>              struct product_type_selector<1,    1,    1>      { enum { ret = InnerProduct }; };
template<>              struct product_type_selector<Small,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
template<>              struct product_type_selector<1,    Small,Small>  { enum { ret = CoeffBasedProductMode }; };

GeneralProduct.h:

116
117
118
119
120
121
122
123
124
125
126
127
/***********************************************************************
*  Implementation of Inner Vector Vector Product
***********************************************************************/

// FIXME : maybe the "inner product" could return a Scalar
// instead of a 1x1 matrix ??
// Pro: more natural for the user
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
// product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);

/***********************************************************************

GeneralProduct.h:

224
225
226
227
228
229
230
231
232
233
234
235
    // make sure Dest is a compile-time vector type (bug 1166)
    typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;

    enum {
      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
      // on, the other hand it is good for the cache to pack the vector anyways...
      EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
      MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
    };

    typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;

GeneralProduct.h:

313
314
315
316
317
318
319
320
321
322
323
324
    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
                                  * RhsBlasTraits::extractScalarFactor(rhs);

    enum {
      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
      // on, the other hand it is good for the cache to pack the vector anyways...
      DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
    };

    gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;

    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),

GeneralProduct.h:

350
351
352
353
354
355
356
357
358
359
360
  template<typename Lhs, typename Rhs, typename Dest>
  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
    EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
    // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
    typename nested_eval<Rhs,1>::type actual_rhs(rhs);
    const Index size = rhs.rows();
    for(Index k=0; k<size; ++k)
      dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
  }
};

GenericPacketMath.h:

350
351
352
353
354
355
356
357
358
359
360
/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
{
  // FIXME: uncomment the following in case we drop the internal imag and real functions.
//   using std::imag;
//   using std::real;
  return Packet(imag(a),real(a));
}

/**************************
* Special math functions

GlobalFunctions.h:

181
182
183
184
185
186
187
    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op)
  }
}

// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random, internal::isApprox...)

#endif // EIGEN_GLOBAL_FUNCTIONS_H

IO.h:

56
57
58
59
60
61
62
63
64
65
66
67
    const std::string& _matPrefix="", const std::string& _matSuffix="")
  : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),
    rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
  {
    // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
    // don't add rowSpacer if columns are not to be aligned
    if((flags & DontAlignCols))
      return;
    int i = int(matSuffix.length())-1;
    while (i>=0 && matSuffix[i]!='\n')
    {
      rowSpacer += ' ';

MathFunctions.h:

10
11
12
13
14
15
16
17
18
19
20
21
#ifndef EIGEN_MATHFUNCTIONS_H
#define EIGEN_MATHFUNCTIONS_H

// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html
// TODO this should better be moved to NumTraits
#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L


namespace Eigen {

// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.

Matrix.h:

44
45
46
47
48
49
50
51
52
53
54
55
    Options = _Options,
    InnerStrideAtCompileTime = 1,
    OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,

    // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
    EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
    Alignment = actual_alignment
  };
};
}

/** \class Matrix

Matrix.h:

261
262
263
264
265
266
267
268
269
270
271
272
      Base::_check_template_params();
      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }

    // FIXME is it still needed
    EIGEN_DEVICE_FUNC
    explicit Matrix(internal::constructor_without_unaligned_array_assert)
      : Base(internal::constructor_without_unaligned_array_assert())
    { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }

#if EIGEN_HAS_RVALUE_REFERENCES
    EIGEN_DEVICE_FUNC

MatrixBase.h:

297
298
299
300
301
302
303
304
305
306
307
308
    { return cwiseNotEqual(other).any(); }

    NoAlias<Derived,Eigen::MatrixBase > noalias();

    // TODO forceAlignedAccess is temporarily disabled
    // Need to find a nicer workaround.
    inline const Derived& forceAlignedAccess() const { return derived(); }
    inline Derived& forceAlignedAccess() { return derived(); }
    template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
    template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }

    EIGEN_DEVICE_FUNC Scalar trace() const;

PlainObjectBase.h:

487
488
489
490
491
492
493
494
495
496
497
498
//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }

#ifndef EIGEN_PARSED_BY_DOXYGEN
    // FIXME is it still needed ?
    /** \internal */
    EIGEN_DEVICE_FUNC
    explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
      : m_storage(internal::constructor_without_unaligned_array_assert())
    {
//       _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }

PlainObjectBase.h:

552
553
554
555
556
557
558
559
560
561
562
563
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
    {
      _check_template_params();
      // FIXME this does not automatically transpose vectors if necessary
      resize(other.rows(), other.cols());
      other.evalTo(this->derived());
    }

  public:

    /** \brief Copies the generic expression \a other into *this.

Product.h:

38
39
40
41
42
43
44
45
46
47
48
49
    ColsAtCompileTime    = RhsTraits::ColsAtCompileTime,
    MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,

    // FIXME: only needed by GeneralMatrixMatrixTriangular
    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),

    // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
    Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
          : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
          : (   ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
             || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit

ProductEvaluators.h:

35
36
37
38
39
40
41
42
43
44
45
46
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
};

// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
// TODO we should apply that rule only if that's really helpful
template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
                                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
                                               const Product<Lhs, Rhs, DefaultProduct> > >
{
  static const bool value = true;
};

ProductEvaluators.h:

108
109
110
111
112
113
114
115
116
117
118
119
    : m_result(xpr.rows(), xpr.cols())
  {
    ::new (static_cast<Base*>(this)) Base(m_result);

// FIXME shall we handle nested_eval here?,
// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
//     typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
//     typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
//     typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
//     typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
//
//     const LhsNested lhs(xpr.lhs());

ProductEvaluators.h:

128
129
130
131
132
133
134
135
136
137
138
139
  PlainObject m_result;
};

// The following three shortcuts are enabled only if the scalar types match excatly.
// TODO: we could enable them for different scalar types when the product is not vectorized.

// Dense = Product
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
{
  typedef Product<Lhs,Rhs,Options> SrcXprType;

ProductEvaluators.h:

143
144
145
146
147
148
149
150
151
152
153
154
    Index dstRows = src.rows();
    Index dstCols = src.cols();
    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
      dst.resize(dstRows, dstCols);
    // FIXME shall we handle nested_eval here?
    generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
  }
};

// Dense += Product
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,

ProductEvaluators.h:

158
159
160
161
162
163
164
165
166
167
168
169
  static EIGEN_STRONG_INLINE
  void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
  {
    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
    // FIXME shall we handle nested_eval here?
    generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
  }
};

// Dense -= Product
template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,

ProductEvaluators.h:

173
174
175
176
177
178
179
180
181
182
183
184
  static EIGEN_STRONG_INLINE
  void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
  {
    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
    // FIXME shall we handle nested_eval here?
    generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
  }
};


// Dense ?= scalar * Product
// TODO we should apply that rule if that's really helpful

ProductEvaluators.h:

180
181
182
183
184
185
186
187
188
189
190
191
};


// Dense ?= scalar * Product
// TODO we should apply that rule if that's really helpful
// for instance, this is not good for inner products
template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
                                           const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>
{
  typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
                        const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,

ProductEvaluators.h:

198
199
200
201
202
203
204
205
206
207
208
};

//----------------------------------------
// Catch "Dense ?= xpr + Product<>" expression to save one temporary
// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct

template<typename OtherXpr, typename Lhs, typename Rhs>
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
                                               const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
  static const bool value = true;
};

ProductEvaluators.h:

272
273
274
275
276
277
278
279
280
281
282
283
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
{
  evaluator<Rhs> rhsEval(rhs);
  typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
  // FIXME not very good if rhs is real and lhs complex while alpha is real too
  const Index cols = dst.cols();
  for (Index j=0; j<cols; ++j)
    func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
}

// Row major result

ProductEvaluators.h:

273
274
275
276
277
278
279
280
281
282
283
284
{
  evaluator<Rhs> rhsEval(rhs);
  typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
  // FIXME not very good if rhs is real and lhs complex while alpha is real too
  const Index cols = dst.cols();
  for (Index j=0; j<cols; ++j)
    func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
}

// Row major result
template<typename Dst, typename Lhs, typename Rhs, typename Func>

ProductEvaluators.h:

285
286
287
288
289
290
291
292
293
294
295
296
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
{
  evaluator<Lhs> lhsEval(lhs);
  typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
  // FIXME not very good if lhs is real and rhs complex while alpha is real too
  const Index rows = dst.rows();
  for (Index i=0; i<rows; ++i)
    func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
}

template<typename Lhs, typename Rhs>

ProductEvaluators.h:

286
287
288
289
290
291
292
293
294
295
296
297
{
  evaluator<Lhs> lhsEval(lhs);
  typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
  // FIXME not very good if lhs is real and rhs complex while alpha is real too
  const Index rows = dst.rows();
  for (Index i=0; i<rows; ++i)
    func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
}

template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>

ProductEvaluators.h:

298
299
300
301
302
303
304
305
306
307
308
309
{
  template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
  typedef typename Product<Lhs,Rhs>::Scalar Scalar;

  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
  struct set  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived()  = src; } };
  struct add  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
  struct sub  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
  struct adds {
    Scalar m_scale;
    explicit adds(const Scalar& s) : m_scale(s) {}
    template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {

ProductEvaluators.h:

523
524
525
526
527
528
529
530
531
532
533
534
                    : (bool(RhsRowMajor) && !CanVectorizeLhs),

    Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
          | (EvalToRowMajor ? RowMajorBit : 0)
          // TODO enable vectorization for mixed types
          | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
          | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),

    LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
    RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),

    Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)

ProductEvaluators.h:

553
554
555
556
557
558
559
560
561
562
563
564
  }

  /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
   * which is why we don't set the LinearAccessBit.
   * TODO: this seems possible when the result is a vector
   */
  EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
  {
    const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
    const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
    return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
  }

ProductEvaluators.h:

588
589
590
591
592
593
594
595
596
597
598
  LhsEtorType m_lhsImpl;
  RhsEtorType m_rhsImpl;

  // TODO: Get rid of m_innerDim if known at compile time
  Index m_innerDim;
};

template<typename Lhs, typename Rhs>
struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
  : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>
{

ProductEvaluators.h:

779
780
781
782
783
784
785
786
787
788
789
790
    _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
    _ScalarAccessOnDiag =  !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
                           ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
    _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
    // FIXME currently we need same types, but in the future the next rule should be the one
    //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
    _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
    _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
    Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
    Alignment = evaluator<MatrixType>::Alignment,

    AsScalarProduct =     (DiagonalType::SizeAtCompileTime==1)

ProductEvaluators.h:

861
862
863
864
865
866
867
868
869
870
871
872
#ifndef __CUDACC__
  template<int LoadMode,typename PacketType>
  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
  {
    // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
    // See also similar calls below.
    return this->template packet_impl<LoadMode,PacketType>(row,col, row,
                                 typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
  }

  template<int LoadMode,typename PacketType>
  EIGEN_STRONG_INLINE PacketType packet(Index idx) const

ProductEvaluators.h:

940
941
942
943
944
945
946
947
948
949
950
951
    static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
    {
      MatrixType mat(xpr);
      const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
      // FIXME we need an is_same for expression that is not sensitive to constness. For instance
      // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
      //if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
      if(is_same_dense(dst, mat))
      {
        // apply the permutation inplace
        Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
        mask.fill(false);

ProductEvaluators.h:

1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
/***************************************************************************
* Products with transpositions matrices
***************************************************************************/

// FIXME could we unify Transpositions and Permutation into a single "shape"??

/** \internal
  * \class transposition_matrix_product
  * Internal helper class implementing the product between a permutation matrix and a matrix.
  */
template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
struct transposition_matrix_product

Redux.h:

14
15
16
17
18
19
20
21
22
23
24
namespace Eigen {

namespace internal {

// TODO
//  * implement other kind of vectorization
//  * factorize code

/***************************************************************************
* Part 1 : the logic deciding a strategy for vectorization and unrolling
***************************************************************************/

Redux.h:

344
345
346
347
348
349
350
351
352
353
354
  enum {
    MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
    MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
    // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
    Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
    IsRowMajor = XprType::IsRowMajor,
    SizeAtCompileTime = XprType::SizeAtCompileTime,
    InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
    CoeffReadCost = evaluator<XprType>::CoeffReadCost,
    Alignment = evaluator<XprType>::Alignment
  };

Replicate.h:

28
29
30
31
32
33
34
35
36
37
38
39
                      : RowFactor * MatrixType::RowsAtCompileTime,
    ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic
                      ? Dynamic
                      : ColFactor * MatrixType::ColsAtCompileTime,
   //FIXME we don't propagate the max sizes !!!
    MaxRowsAtCompileTime = RowsAtCompileTime,
    MaxColsAtCompileTime = ColsAtCompileTime,
    IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
               : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
               : (MatrixType::Flags & RowMajorBit) ? 1 : 0,

    // FIXME enable DirectAccess with negative strides?

Replicate.h:

35
36
37
38
39
40
41
42
43
44
45
46
    IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
               : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
               : (MatrixType::Flags & RowMajorBit) ? 1 : 0,

    // FIXME enable DirectAccess with negative strides?
    Flags = IsRowMajor ? RowMajorBit : 0
  };
};
}

/**
  * \class Replicate

ReturnByValue.h:

31
32
33
34
35
36
37
38
39
40
41
42
/* The ReturnByValue object doesn't even have a coeff() method.
 * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix.
 * So internal::nested always gives the plain return matrix type.
 *
 * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
 * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators
 */
template<typename Derived,int n,typename PlainObject>
struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
{
  typedef typename traits<Derived>::ReturnType type;
};

ReturnByValue.h:

88
89
90
91
92
93
94
95
96
97
98
99
namespace internal {

// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that
// when a ReturnByValue expression is assigned, the evaluator is not constructed.
// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world

template<typename Derived>
struct evaluator<ReturnByValue<Derived> >
  : public evaluator<typename internal::traits<Derived>::ReturnType>
{
  typedef ReturnByValue<Derived> XprType;
  typedef typename internal::traits<Derived>::ReturnType PlainObject;

SelfAdjointView.h:

264
265
266
267
268
269
270
271
272
273
274
275
// selfadjoint to dense matrix

namespace internal {

// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
//      in the future selfadjoint-ness should be defined by the expression traits
//      such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
template<typename MatrixType, unsigned int Mode>
struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
{
  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
  typedef SelfAdjointShape Shape;

SelfCwiseBinaryOp.h:

11
12
13
14
15
16
17
18
19
20
21
22
#define EIGEN_SELFCWISEBINARYOP_H

namespace Eigen {

// TODO generalize the scalar type of 'other'

template<typename Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{
  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
  return derived();
}

SolveTriangular.h:

57
58
59
60
61
62
63
64
65
66
67
68
  static void run(const Lhs& lhs, Rhs& rhs)
  {
    ActualLhsType actualLhs = LhsProductTraits::extract(lhs);

    // FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1

    bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;

    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
                                                  (useRhsDirectly ? rhs.data() : 0));

    if(!useRhsDirectly)

StableNorm.h:

43
44
45
46
47
48
49
50
51
52
53
54
  {
    scale = maxCoeff;
  }

  // TODO if the maxCoeff is much much smaller than the current scale,
  // then we can neglect this sub vector
  if(scale>Scalar(0)) // if scale==0, then bl is 0
    ssq += (bl*invScale).squaredNorm();
}

template<typename Derived>
inline typename NumTraits<typename traits<Derived>::Scalar>::Real

Swap.h:

49
50
51
52
53
54
55
56
57
58
59
60
    const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode,PacketType>(index));
    m_dst.template writePacket<StoreMode>(index,tmp);
  }

  // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
  template<int StoreMode, int LoadMode, typename PacketType>
  void assignPacketByOuterInner(Index outer, Index inner)
  {
    Index row = Base::rowIndexByOuterInner(outer, inner);
    Index col = Base::colIndexByOuterInner(outer, inner);
    assignPacket<StoreMode,LoadMode,PacketType>(row, col);
  }

Transpose.h:

133
134
135
136
137
138
139
140
141
142
143
    EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); }

    // FIXME: shall we keep the const version of coeffRef?
    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index rowId, Index colId) const
    {
      return derived().nestedExpression().coeffRef(colId, rowId);
    }

    EIGEN_DEVICE_FUNC

Transpose.h:

231
232
233
234
235
236
237
238
239
240
241
242
    m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
  }
};

// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only.
template<typename MatrixType>
struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize
  static void run(MatrixType& m) {
    typedef typename MatrixType::Scalar Scalar;
    typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
    const Index PacketSize = internal::packet_traits<Scalar>::size;
    const Index Alignment = internal::evaluator<MatrixType>::Alignment;

Transpositions.h:

82
83
84
85
86
87
88
89
90
91
92
93
      for(StorageIndex i = 0; i < indices().size(); ++i)
        coeffRef(i) = i;
    }

    // FIXME: do we want such methods ?
    // might be usefull when the target matrix expression is complex, e.g.:
    // object.matrix().block(..,..,..,..) = trans * object.matrix().block(..,..,..,..);
    /*
    template<typename MatrixType>
    void applyForwardToRows(MatrixType& mat) const
    {
      for(Index k=0 ; k<size() ; ++k)

TriangularMatrix.h:

550
551
552
553
554
555
556
557
558
559
560
561
* Implementation of triangular evaluation/assignment
***************************************************************************/

#ifndef EIGEN_PARSED_BY_DOXYGEN
// FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
inline TriangularView<MatrixType, Mode>&
TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
{
  internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();

TriangularMatrix.h:

560
561
562
563
564
565
566
567
568
569
570
  internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
}

// FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived>
void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
{
  internal::call_assignment_no_alias(derived(), other.template triangularView<Mode>());
}

TriangularMatrix.h:

698
699
700
701
702
703
704
705
706
707
708
namespace internal {


// TODO currently a triangular expression has the form TriangularView<.,.>
//      in the future triangular-ness should be defined by the expression traits
//      such that Transpose<TriangularView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
template<typename MatrixType, unsigned int Mode>
struct evaluator_traits<TriangularView<MatrixType,Mode> >
{
  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
  typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;

TriangularMatrix.h:

851
852
853
854
855
856
857
858
859
860
861
template<typename Kernel, unsigned int Mode, int UnrollCount, bool SetOpposite>
struct triangular_assignment_loop
{
  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
  typedef typename DstEvaluatorType::XprType DstXprType;

  enum {
    col = (UnrollCount-1) / DstXprType::RowsAtCompileTime,
    row = (UnrollCount-1) % DstXprType::RowsAtCompileTime
  };

TriangularMatrix.h:

886
887
888
889
890
891
892
893
894
895
896
897
};



// TODO: experiment with a recursive assignment procedure splitting the current
//       triangular part into one rectangular and two triangular parts.


template<typename Kernel, unsigned int Mode, bool SetOpposite>
struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
{
  typedef typename Kernel::Scalar Scalar;

MatrixBaseEigenvalues.h:

122
123
124
125
126
127
128
129
130
131
132
133
MatrixBase<Derived>::operatorNorm() const
{
  using std::sqrt;
  typename Derived::PlainObject m_eval(derived());
  // FIXME if it is really guaranteed that the eigenvalues are already sorted,
  // then we don't need to compute a maxCoeff() here, comparing the 1st and last ones is enough.
  return sqrt((m_eval*m_eval.adjoint())
                 .eval()
             .template selfadjointView<Lower>()
             .eigenvalues()
             .maxCoeff()
             );

RealSchur.h:

352
353
354
355
356
357
358
359
360
361
362
363
template<typename MatrixType>
inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()
{
  const Index size = m_matT.cols();
  // FIXME to be efficient the following would requires a triangular reduxion code
  // Scalar norm = m_matT.upper().cwiseAbs().sum()
  //               + m_matT.bottomLeftCorner(size-1,size-1).diagonal().cwiseAbs().sum();
  Scalar norm(0);
  for (Index j = 0; j < size; ++j)
    norm += m_matT.col(j).segment(0, (std::min)(size,j+2)).cwiseAbs().sum();
  return norm;
}

SelfAdjointEigenSolver.h:

449
450
451
452
453
454
455
456
457
458
459
460
template<typename MatrixType>
SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
::computeFromTridiagonal(const RealVectorType& diag, const SubDiagonalType& subdiag , int options)
{
  //TODO : Add an option to scale the values beforehand
  bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;

  m_eivalues = diag;
  m_subdiag = subdiag;
  if (computeEigenvectors)
  {
    m_eivec.setIdentity(diag.size(), diag.size());

SelfAdjointEigenSolver.h:

524
525
526
527
528
529
530
531
532
533
534
535
  else
    info = NoConvergence;

  // Sort eigenvalues and corresponding vectors.
  // TODO make the sort optional ?
  // TODO use a better sort algorithm !!
  if (info == Success)
  {
    for (Index i = 0; i < n-1; ++i)
    {
      Index k;
      diag.segment(i,n-i).minCoeff(&k);

SelfAdjointEigenSolver.h:

525
526
527
528
529
530
531
532
533
534
535
536
    info = NoConvergence;

  // Sort eigenvalues and corresponding vectors.
  // TODO make the sort optional ?
  // TODO use a better sort algorithm !!
  if (info == Success)
  {
    for (Index i = 0; i < n-1; ++i)
    {
      Index k;
      diag.segment(i,n-i).minCoeff(&k);
      if (k > 0)

SelfAdjointEigenSolver.h:

635
636
637
638
639
640
641
642
643
644
645
646
    VectorType& eivals = solver.m_eivalues;

    // Shift the matrix to the mean eigenvalue and map the matrix coefficients to [-1:1] to avoid over- and underflow.
    Scalar shift = mat.trace() / Scalar(3);
    // TODO Avoid this copy. Currently it is necessary to suppress bogus values when determining maxCoeff and for computing the eigenvectors later
    MatrixType scaledMat = mat.template selfadjointView<Lower>();
    scaledMat.diagonal().array() -= shift;
    Scalar scale = scaledMat.cwiseAbs().maxCoeff();
    if(scale > 0) scaledMat /= scale;   // TODO for scale==0 we could save the remaining operations

    // compute the eigenvalues
    computeRoots(scaledMat,eivals);

SelfAdjointEigenSolver.h:

855
856
857
858
859
860
861
862
863
864
865
    // apply the givens rotation to the unit matrix Q = Q * G
    if (matrixQ)
    {
      // FIXME if StorageOrder == RowMajor this operation is not very efficient
      Map<Matrix<Scalar,Dynamic,Dynamic,StorageOrder> > q(matrixQ,n,n);
      q.applyOnTheRight(k,k+1,rot);
    }
  }
}

} // end namespace internal

Homogeneous.h:

266
267
268
269
270
271
272
273
274
275
276
277
  EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }

  template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
  {
    // FIXME investigate how to allow lazy evaluation of this product when possible
    dst = Block<const LhsMatrixTypeNested,
              LhsMatrixTypeNested::RowsAtCompileTime,
              LhsMatrixTypeNested::ColsAtCompileTime==Dynamic?Dynamic:LhsMatrixTypeNested::ColsAtCompileTime-1>
            (m_lhs,0,0,m_lhs.rows(),m_lhs.cols()-1) * m_rhs;
    dst += m_lhs.col(m_lhs.cols()-1).rowwise()
            .template replicate<MatrixType::ColsAtCompileTime>(m_rhs.cols());
  }

Homogeneous.h:

304
305
306
307
308
309
310
311
312
313
314
315
  EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }

  template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
  {
    // FIXME investigate how to allow lazy evaluation of this product when possible
    dst = m_lhs * Block<const RhsNested,
                        RhsNested::RowsAtCompileTime==Dynamic?Dynamic:RhsNested::RowsAtCompileTime-1,
                        RhsNested::ColsAtCompileTime>
            (m_rhs,0,0,m_rhs.rows()-1,m_rhs.cols());
    dst += m_rhs.row(m_rhs.rows()-1).colwise()
            .template replicate<MatrixType::RowsAtCompileTime>(m_lhs.rows());
  }

Homogeneous.h:

430
431
432
433
434
435
436
437
438
439
440
441
    homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst);
  }
};

// TODO: the following specialization is to address a regression from 3.2 to 3.3
// In the future, this path should be optimized.
template<typename Lhs, typename RhsArg, int ProductTag>
struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, TriangularShape, HomogeneousShape, ProductTag>
{
  template<typename Dest>
  static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
  {

Hyperplane.h:

118
119
120
121
122
123
124
125
126
127
128
129
  /** Constructs a hyperplane passing through the parametrized line \a parametrized.
    * If the dimension of the ambient space is greater than 2, then there isn't uniqueness,
    * so an arbitrary choice is made.
    */
  // FIXME to be consitent with the rest this could be implemented as a static Through function ??
  EIGEN_DEVICE_FUNC explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
  {
    normal() = parametrized.direction().unitOrthogonal();
    offset() = -parametrized.origin().dot(normal());
  }

  EIGEN_DEVICE_FUNC ~Hyperplane() {}

Quaternion.h:

662
663
664
665
666
667
668
669
670
671
672
673
  */
template <class Derived>
EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
{
  // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite()  ??
  Scalar n2 = this->squaredNorm();
  if (n2 > Scalar(0))
    return Quaternion<Scalar>(conjugate().coeffs() / n2);
  else
  {
    // return an invalid result to flag the error
    return Quaternion<Scalar>(Coefficients::Zero());

Scaling.h:

67
68
69
70
71
72
73
74
75
76
77
78
    return res;
  }

  /** Concatenates a uniform scaling and a linear transformation matrix */
  // TODO returns an expression
  template<typename Derived>
  inline typename internal::plain_matrix_type<Derived>::type operator* (const MatrixBase<Derived>& other) const
  { return other * m_factor; }

  template<typename Derived,int Dim>
  inline Matrix<Scalar,Dim,Dim> operator*(const RotationBase<Derived,Dim>& r) const
  { return r.toRotationMatrix() * m_factor; }

BlockHouseholder.h:

62
63
64
65
66
67
68
69
70
71
72
    {
      triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint()
                                                        * vectors.bottomRightCorner(rs, rt).template triangularView<UnitLower>();

      // FIXME add .noalias() once the triangular product can work inplace
      triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView<Upper>();

    }
    triFactor(i,i) = hCoeffs(i);
  }
}

BlockHouseholder.h:

89
90
91
92
93
94
95
96
97
98
99
  // A -= V T V^* A
  Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,
         (VectorsType::MaxColsAtCompileTime==1 && MatrixType::MaxColsAtCompileTime!=1)?RowMajor:ColMajor,
         VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat;
  // FIXME add .noalias() once the triangular product can work inplace
  if(forward) tmp = T.template triangularView<Upper>()           * tmp;
  else        tmp = T.template triangularView<Upper>().adjoint() * tmp;
  mat.noalias() -= V * tmp;
}

} // end namespace internal

IncompleteCholesky.h:

245
246
247
248
249
250
251
252
253
254
255
256
      m_scale(j) = RealScalar(1)/m_scale(j);
    else
      m_scale(j) = 1;

  // TODO disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster)

  // Scale and compute the shift for the matrix
  RealScalar mindiag = NumTraits<RealScalar>::highest();
  for (Index j = 0; j < n; j++)
  {
    for (Index k = colPtr[j]; k < colPtr[j+1]; k++)
      vals[k] *= (m_scale(j)*m_scale(rowIdx[k]));

IncompleteLUT.h:

228
229
230
231
232
233
234
235
236
237
238
239
#ifndef EIGEN_MPL2_ONLY
  // To this end, let's symmetrize the pattern and perform AMD on it.
  SparseMatrix<Scalar,ColMajor, StorageIndex> mat1 = amat;
  SparseMatrix<Scalar,ColMajor, StorageIndex> mat2 = amat.transpose();
  // FIXME for a matrix with nearly symmetric pattern, mat2+mat1 is the appropriate choice.
  //       on the other hand for a really non-symmetric pattern, mat2*mat1 should be prefered...
  SparseMatrix<Scalar,ColMajor, StorageIndex> AtA = mat2 + mat1;
  AMDOrdering<StorageIndex> ordering;
  ordering(AtA,m_P);
  m_Pinv  = m_P.inverse(); // cache the inverse permutation
#else
  // If AMD is not available, (MPL2-only), then let's use the slower COLAMD routine.

FullPivLU.h:

63
64
65
66
67
68
69
70
71
72
73
74
    typedef _MatrixType MatrixType;
    typedef SolverBase<FullPivLU> Base;

    EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU)
    // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
    enum {
      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
    };
    typedef typename internal::plain_row_type<MatrixType, StorageIndex>::type IntRowVectorType;
    typedef typename internal::plain_col_type<MatrixType, StorageIndex>::type IntColVectorType;
    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType;

FullPivLU.h:

236
237
238
239
240
241
242
243
244
245
246
247
      * Output: \verbinclude FullPivLU_solve.out
      *
      * \sa TriangularView::solve(), kernel(), inverse()
      */
    // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion.
    template<typename Rhs>
    inline const Solve<FullPivLU, Rhs>
    solve(const MatrixBase<Rhs>& b) const
    {
      eigen_assert(m_isInitialized && "LU is not initialized.");
      return Solve<FullPivLU, Rhs>(*this, b.derived());
    }

FullPivLU.h:

596
597
598
599
600
601
602
603
604
605
606
607
  eigen_assert(m_isInitialized && "LU is not initialized.");
  const Index smalldim = (std::min)(m_lu.rows(), m_lu.cols());
  // LU
  MatrixType res(m_lu.rows(),m_lu.cols());
  // FIXME the .toDenseMatrix() should not be needed...
  res = m_lu.leftCols(smalldim)
            .template triangularView<UnitLower>().toDenseMatrix()
      * m_lu.topRows(smalldim)
            .template triangularView<Upper>().toDenseMatrix();

  // P^{-1}(LU)
  res = m_p.inverse() * res;

FullPivLU.h:

665
666
667
668
669
670
671
672
673
674
675
    // we construct a temporaty trapezoid matrix m, by taking the U matrix and
    // permuting the rows and cols to bring the nonnegligible pivots to the top of
    // the main diagonal. We need that to be able to apply our triangular solvers.
    // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified
    Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options,
           MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime>
      m(dec().matrixLU().block(0, 0, rank(), cols));
    for(Index i = 0; i < rank(); ++i)
    {
      if(i) m.row(i).head(i).setZero();
      m.row(i).tail(cols-i) = dec().matrixLU().row(pivots.coeff(i)).tail(cols-i);

PartialPivLU.h:

79
80
81
82
83
84
85
86
87
88
89
    typedef _MatrixType MatrixType;
    typedef SolverBase<PartialPivLU> Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU)
    // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
    enum {
      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
    };
    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
    typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
    typedef typename MatrixType::PlainObject PlainObject;

PartialPivLU.h:

168
169
170
171
172
173
174
175
176
177
178
179
      * theoretically exists and is unique regardless of b.
      *
      * \sa TriangularView::solve(), inverse(), computeInverse()
      */
    // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion.
    template<typename Rhs>
    inline const Solve<PartialPivLU, Rhs>
    solve(const MatrixBase<Rhs>& b) const
    {
      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
      return Solve<PartialPivLU, Rhs>(*this, b.derived());
    }

PartialPivLU.h:

341
342
343
344
345
346
347
348
349
350
351
352
/** \internal This is the blocked version of fullpivlu_unblocked() */
template<typename Scalar, int StorageOrder, typename PivIndex>
struct partial_lu_impl
{
  // FIXME add a stride to Map, so that the following mapping becomes easier,
  // another option would be to create an expression being able to automatically
  // warp any Map, Matrix, and Block expressions as a unique type, but since that's exactly
  // a Map + stride, why not adding a stride to Map, and convenient ctors from a Matrix,
  // and Block.
  typedef Map<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > MapLU;
  typedef Block<MapLU, Dynamic, Dynamic> MatrixType;
  typedef Block<MatrixType,Dynamic,Dynamic> BlockType;

PartialPivLU.h:

390
391
392
393
394
395
396
397
398
399
400
401
          lu.row(k).swap(lu.row(row_of_biggest_in_col));
          ++nb_transpositions;
        }

        // FIXME shall we introduce a safe quotient expression in cas 1/lu.coeff(k,k)
        // overflow but not the actual quotient?
        lu.col(k).tail(rrows) /= lu.coeff(k,k);
      }
      else if(first_zero_pivot==-1)
      {
        // the pivot is exactly zero, we record the index of the first pivot which is exactly 0,
        // and continue the factorization such we still have A = PLU

MetisSupport.h:

112
113
114
115
116
117
118
119
120
121
122
123
     output_error = METIS_NodeND(&m, m_indexPtr.data(), m_innerIndices.data(), NULL, NULL, perm.data(), iperm.data());

    if(output_error != METIS_OK)
    {
      //FIXME The ordering interface should define a class of possible errors
     std::cerr << "ERROR WHILE CALLING THE METIS PACKAGE \n";
     return;
    }

    // Get the fill-reducing permutation
    //NOTE:  If Ap is the permuted matrix then perm and iperm vectors are defined as follows
    // Row (column) i of Ap is the perm(i) row(column) of A, and row (column) i of A is the iperm(i) row(column) of Ap

Ordering.h:

20
21
22
23
24
25
26
27
28
29
30
31
/** \internal
  * \ingroup OrderingMethods_Module
  * \param[in] A the input non-symmetric matrix
  * \param[out] symmat the symmetric pattern A^T+A from the input matrix \a A.
  * FIXME: The values should not be considered here
  */
template<typename MatrixType>
void ordering_helper_at_plus_a(const MatrixType& A, MatrixType& symmat)
{
  MatrixType C;
  C = A.transpose(); // NOTE: Could be  costly
  for (int i = 0; i < C.rows(); i++)

ColPivHouseholderQR.h:

57
58
59
60
61
62
63
64
65
66
67
68
      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
    };
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::RealScalar RealScalar;
    // FIXME should be int
    typedef typename MatrixType::StorageIndex StorageIndex;
    typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;
    typedef typename internal::plain_row_type<MatrixType, Index>::type IntRowVectorType;
    typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
    typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;
    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;

FullPivHouseholderQR.h:

66
67
68
69
70
71
72
73
74
75
76
77
      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
    };
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::RealScalar RealScalar;
    // FIXME should be int
    typedef typename MatrixType::StorageIndex StorageIndex;
    typedef internal::FullPivHouseholderQRMatrixQReturnType<MatrixType> MatrixQReturnType;
    typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
    typedef Matrix<StorageIndex, 1,
                   EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime,RowsAtCompileTime), RowMajor, 1,
                   EIGEN_SIZE_MIN_PREFER_FIXED(MaxColsAtCompileTime,MaxRowsAtCompileTime)> IntDiagSizeVectorType;
    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;

FullPivHouseholderQR.h:

543
544
545
546
547
548
549
550
551
552
553
{
  eigen_assert(rhs.rows() == rows());
  const Index l_rank = rank();

  // FIXME introduce nonzeroPivots() and use it here. and more generally,
  // make the same improvements in this dec as in FullPivLU.
  if(l_rank==0)
  {
    dst.setZero();
    return;
  }

HouseholderQR.h:

53
54
55
56
57
58
59
60
61
62
63
64
      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
    };
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::RealScalar RealScalar;
    // FIXME should be int
    typedef typename MatrixType::StorageIndex StorageIndex;
    typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, (MatrixType::Flags&RowMajorBit) ? RowMajor : ColMajor, MaxRowsAtCompileTime, MaxRowsAtCompileTime> MatrixQType;
    typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
    typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;

    /**

BDCSVD.h:

247
248
249
250
251
252
253
254
255
256
257
  //**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return
  if(matrix.cols() < m_algoswap)
  {
    // FIXME this line involves temporaries
    JacobiSVD<MatrixType> jsvd(matrix,computationOptions);
    if(computeU()) m_matrixU = jsvd.matrixU();
    if(computeV()) m_matrixV = jsvd.matrixV();
    m_singularValues = jsvd.singularValues();
    m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
    m_isInitialized = true;
    return *this;

BDCSVD.h:

265
266
267
268
269
270
271
272
273
274
275
276
  if (m_isTranspose) copy = matrix.adjoint()/scale;
  else               copy = matrix/scale;

  //**** step 1 - Bidiagonalization
  // FIXME this line involves temporaries
  internal::UpperBidiagonalization<MatrixX> bid(copy);

  //**** step 2 - Divide & Conquer
  m_naiveU.setZero();
  m_naiveV.setZero();
  // FIXME this line involves a temporary matrix
  m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();

BDCSVD.h:

271
272
273
274
275
276
277
278
279
280
281
  //**** step 2 - Divide & Conquer
  m_naiveU.setZero();
  m_naiveV.setZero();
  // FIXME this line involves a temporary matrix
  m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
  m_computed.template bottomRows<1>().setZero();
  divide(0, m_diagSize - 1, 0, 0, 0);

  //**** step 3 - Copy singular values and vectors
  for (int i=0; i<m_diagSize; i++)
  {

BDCSVD.h:

405
406
407
408
409
410
411
412
413
414
415
416
  // We use the other algorithm which is more efficient for small
  // matrices.
  if (n < m_algoswap)
  {
    // FIXME this line involves temporaries
    JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0));
    if (m_compU)
      m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
    else
    {
      m_naiveU.row(0).segment(firstCol, n + 1).real() = b.matrixU().row(0);
      m_naiveU.row(1).segment(firstCol, n + 1).real() = b.matrixU().row(n);

BDCSVD.h:

563
564
565
566
567
568
569
570
571
572
573
574
// the first column and on the diagonal and has undergone deflation, so diagonal is in increasing
// order except for possibly the (0,0) entry. The computed SVD is stored U, singVals and V, except
// that if m_compV is false, then V is not computed. Singular values are sorted in decreasing order.
//
// TODO Opportunities for optimization: better root finding algo, better stopping criterion, better
// handling of round-off errors, be consistent in ordering
// For instance, to solve the secular equation using FMM, see http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf
template <typename MatrixType>
void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
{
  const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
  using std::abs;

BDCSVD.h:

1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
// page 13
// i,j >= 1, i!=j and |di - dj| < epsilon * norm2(M)
// We apply two rotations to have zj = 0;
// TODO deflation44 is still broken and not properly tested
template <typename MatrixType>
void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size)
{
  using std::abs;
  using std::sqrt;
  using std::conj;
  using std::pow;

AmbiVector.h:

189
190
191
192
193
194
195
196
197
198
199
200
    return m_buffer[i];
  else
  {
    ListEl* EIGEN_RESTRICT llElements = reinterpret_cast<ListEl*>(m_buffer);
    // TODO factorize the following code to reduce code generation
    eigen_assert(m_mode==IsSparse);
    if (m_llSize==0)
    {
      // this is the first element
      m_llStart = 0;
      m_llCurrent = 0;
      ++m_llSize;

ConservativeSparseSparseProduct.h:

85
86
87
88
89
90
91
92
93
94
95
96
      // alternative ordered insertion code:
      const Index t200 = rows/11; // 11 == (log2(200)*1.39)
      const Index t = (rows*100)/139;

      // FIXME reserve nnz non zeros
      // FIXME implement faster sorting algorithms for very small nnz
      // if the result is sparse enough => use a quick sort
      // otherwise => loop through the entire vector
      // In order to avoid to perform an expensive log2 when the
      // result is clearly very sparse we use a linear bound up to 200.
      if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t)
      {

ConservativeSparseSparseProduct.h:

86
87
88
89
90
91
92
93
94
95
96
97
      const Index t200 = rows/11; // 11 == (log2(200)*1.39)
      const Index t = (rows*100)/139;

      // FIXME reserve nnz non zeros
      // FIXME implement faster sorting algorithms for very small nnz
      // if the result is sparse enough => use a quick sort
      // otherwise => loop through the entire vector
      // In order to avoid to perform an expensive log2 when the
      // result is clearly very sparse we use a linear bound up to 200.
      if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t)
      {
        if(nnz>1) std::sort(indices,indices+nnz);

ConservativeSparseSparseProduct.h:

143
144
145
146
147
148
149
150
151
152
153
154
    typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime,ColMajorMatrixAux::Flags>::type ColMajorMatrix;

    // If the result is tall and thin (in the extreme case a column vector)
    // then it is faster to sort the coefficients inplace instead of transposing twice.
    // FIXME, the following heuristic is probably not very good.
    if(lhs.rows()>rhs.cols())
    {
      ColMajorMatrix resCol(lhs.rows(),rhs.cols());
      // perform sorted insertion
      internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true);
      res = resCol.markAsRValue();
    }

SparseAssign.h:

23
24
25
26
27
28
29
30
31
32
33
34
template<typename Derived>
template<typename OtherDerived>
Derived& SparseMatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
{
  // TODO use the evaluator mechanism
  other.evalTo(derived());
  return derived();
}

template<typename Derived>
template<typename OtherDerived>
inline Derived& SparseMatrixBase<Derived>::operator=(const SparseMatrixBase<OtherDerived>& other)

SparseCwiseBinaryOp.h:

28
29
30
31
32
33
34
35
36
37
38
39
//                         generic      sparse
//  4 - dense op dense     product      dense
//                         generic      dense
//
// TODO to ease compiler job, we could specialize product/quotient with a scalar
//      and fallback to cwise-unary evaluator using bind1st_op and bind2nd_op.

template<typename BinaryOp, typename Lhs, typename Rhs>
class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse>
  : public SparseMatrixBase<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
{
  public:

SparseDenseProduct.h:

70
71
72
73
74
75
76
77
78
79
80
81
  }

};

// FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
// -> let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators
// template<typename T1, typename T2/*, int _Options, typename _StrideType*/>
// struct ScalarBinaryOpTraits<T1, Ref<T2/*, _Options, _StrideType*/> >
// {
//   enum {
//     Defined = 1
//   };

SparseMatrix.h:

518
519
520
521
522
523
524
525
526
527
528
529
      */
    template<typename KeepFunc>
    void prune(const KeepFunc& keep = KeepFunc())
    {
      // TODO optimize the uncompressed mode to avoid moving and allocating the data twice
      makeCompressed();

      StorageIndex k = 0;
      for(Index j=0; j<m_outerSize; ++j)
      {
        Index previousStart = m_outerIndex[j];
        m_outerIndex[j] = k;

SparseMatrix.h:

1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
template<typename DupFunctor>
void SparseMatrix<Scalar,_Options,_StorageIndex>::collapseDuplicates(DupFunctor dup_func)
{
  eigen_assert(!isCompressed());
  // TODO, in practice we should be able to use m_innerNonZeros for that task
  IndexVector wi(innerSize());
  wi.fill(-1);
  StorageIndex count = 0;
  // for each inner-vector, wi[inner_index] will hold the position of first element into the index/value buffers
  for(Index j=0; j<outerSize(); ++j)
  {
    StorageIndex start   = count;

SparseMatrix.h:

1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
    SparseMatrix dest(other.rows(),other.cols());
    Eigen::Map<IndexVector> (dest.m_outerIndex,dest.outerSize()).setZero();

    // pass 1
    // FIXME the above copy could be merged with that pass
    for (Index j=0; j<otherCopy.outerSize(); ++j)
      for (typename OtherCopyEval::InnerIterator it(otherCopyEval, j); it; ++it)
        ++dest.m_outerIndex[it.index()];

    // prefix sum
    StorageIndex count = 0;
    IndexVector positions(dest.outerSize());

SparseMatrix.h:

1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
  bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0))
                && (std::size_t(m_outerIndex[outer+1]) == m_data.size());

  std::size_t startId = m_outerIndex[outer];
  // FIXME let's make sure sizeof(long int) == sizeof(std::size_t)
  std::size_t p = m_outerIndex[outer+1];
  ++m_outerIndex[outer+1];

  double reallocRatio = 1;
  if (m_data.allocatedSize()<=m_data.size())
  {
    // if there is no preallocated memory, let's reserve a minimum of 32 elements

SparseMatrixBase.h:

109
110
111
112
113
114
115
116
117
118
119
120
                     >::type AdjointReturnType;
    typedef Transpose<Derived> TransposeReturnType;
    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;

    // FIXME storage order do not match evaluator storage order
    typedef SparseMatrix<Scalar, Flags&RowMajorBit ? RowMajor : ColMajor, StorageIndex> PlainObject;

#ifndef EIGEN_PARSED_BY_DOXYGEN
    /** This is the "real scalar" type; if the \a Scalar type is already real numbers
      * (e.g. int, float or double) then \a RealScalar is just the same as \a Scalar. If
      * \a Scalar is \a std::complex<T> then RealScalar is \a T.
      *

SparseMatrixBase.h:

188
189
190
191
192
193
194
195
196
197
198
    bool isRValue() const { return m_isRValue; }
    Derived& markAsRValue() { m_isRValue = true; return derived(); }

    SparseMatrixBase() : m_isRValue(false) { /* TODO check flags */ }


    template<typename OtherDerived>
    Derived& operator=(const ReturnByValue<OtherDerived>& other);

    template<typename OtherDerived>
    inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other);

SparsePermutation.h:

87
88
89
90
91
92
93
94
95
96
97
template <int ProductTag> struct product_promote_storage_type<Sparse,             PermutationStorage, ProductTag> { typedef Sparse ret; };
template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Sparse,             ProductTag> { typedef Sparse ret; };

// TODO, the following two overloads are only needed to define the right temporary type through
// typename traits<permutation_sparse_matrix_product<Rhs,Lhs,OnTheRight,false> >::ReturnType
// whereas it should be correctly handled by traits<Product<> >::PlainObject

template<typename Lhs, typename Rhs, int ProductTag>
struct product_evaluator<Product<Lhs, Rhs, AliasFreeProduct>, ProductTag, PermutationShape, SparseShape>
  : public evaluator<typename permutation_matrix_product<Rhs,OnTheLeft,false,SparseShape>::ReturnType>
{

SparseRef.h:

347
348
349
350
351
352
353
354
355
356
357
358
};

namespace internal {

// FIXME shall we introduce a general evaluatior_ref that we can specialize for any sparse object once, and thus remove this copy-pasta thing...

template<typename MatScalar, int MatOptions, typename MatIndex, int Options, typename StrideType>
struct evaluator<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> >
  : evaluator<SparseCompressedBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > >
{
  typedef evaluator<SparseCompressedBase<Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> > > Base;
  typedef Ref<SparseMatrix<MatScalar,MatOptions,MatIndex>, Options, StrideType> XprType;

SparseSelfAdjointView.h:

122
123
124
125
126
127
128
129
130
131
132
133
    template<typename DerivedU>
    SparseSelfAdjointView& rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));

    /** \returns an expression of P H P^-1 */
    // TODO implement twists in a more evaluator friendly fashion
    SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm) const
    {
      return SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode>(m_matrix, perm);
    }

    template<typename SrcMatrixType,int SrcMode>
    SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcMode>& permutedMatrix)

SparseSelfAdjointView.h:

203
204
205
206
207
208
209
210
211
212
213
214
}

namespace internal {

// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
//      in the future selfadjoint-ness should be defined by the expression traits
//      such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
template<typename MatrixType, unsigned int Mode>
struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >
{
  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
  typedef SparseSelfAdjointShape Shape;

SparseSelfAdjointView.h:

230
231
232
233
234
235
236
237
238
239
240
241
  {
    internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), dst);
  }

  // FIXME: the handling of += and -= in sparse matrices should be cleanup so that next two overloads could be reduced to:
  template<typename DestScalar,int StorageOrder,typename AssignFunc>
  static void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src, const AssignFunc& func)
  {
    SparseMatrix<DestScalar,StorageOrder,StorageIndex> tmp(src.rows(),src.cols());
    run(tmp, src, AssignOpType());
    call_assignment_no_alias_no_transpose(dst, tmp, func);
  }

SparseSelfAdjointView.h:

260
261
262
263
264
265
266
267
268
269
270
  template<typename DestScalar>
  static void run(DynamicSparseMatrix<DestScalar,ColMajor,StorageIndex>& dst, const SrcXprType &src, const AssignOpType&/*func*/)
  {
    // TODO directly evaluate into dst;
    SparseMatrix<DestScalar,ColMajor,StorageIndex> tmp(dst.rows(),dst.cols());
    internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), tmp);
    dst = tmp;
  }
};

} // end namespace internal

SparseSelfAdjointView.h:

373
374
375
376
377
378
379
380
381
382
383
384
  }
};

// NOTE: these two overloads are needed to evaluate the sparse selfadjoint view into a full sparse matrix
// TODO: maybe the copy could be handled by generic_product_impl so that these overloads would not be needed anymore

template<typename LhsView, typename Rhs, int ProductTag>
struct product_evaluator<Product<LhsView, Rhs, DefaultProduct>, ProductTag, SparseSelfAdjointShape, SparseShape>
  : public evaluator<typename Product<typename Rhs::PlainObject, Rhs, DefaultProduct>::PlainObject>
{
  typedef Product<LhsView, Rhs, DefaultProduct> XprType;
  typedef typename XprType::PlainObject PlainObject;

SparseSelfAdjointView.h:

581
582
583
584
585
586
587
588
589
590
591
}

}

// TODO implement twists in a more evaluator friendly fashion

namespace internal {

template<typename MatrixType, int Mode>
struct traits<SparseSymmetricPermutationProduct<MatrixType,Mode> > : traits<MatrixType> {
};

SparseSparseProductWithPruning.h:

54
55
56
57
58
59
60
61
62
63
64
65
  res.reserve(estimated_nnz_prod);
  double ratioColRes = double(estimated_nnz_prod)/(double(lhs.rows())*double(rhs.cols()));
  for (Index j=0; j<cols; ++j)
  {
    // FIXME:
    //double ratioColRes = (double(rhs.innerVector(j).nonZeros()) + double(lhs.nonZeros())/double(lhs.cols()))/double(lhs.rows());
    // let's do a more accurate determination of the nnz ratio for the current column j of res
    tempVector.init(ratioColRes);
    tempVector.setZero();
    for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
    {
      // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())

SparseSparseProductWithPruning.h:

61
62
63
64
65
66
67
68
69
70
71
72
    tempVector.init(ratioColRes);
    tempVector.setZero();
    for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
    {
      // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
      tempVector.restart();
      RhsScalar x = rhsIt.value();
      for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt)
      {
        tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x;
      }
    }

SparseUtil.h:

 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
  public:
    typedef SparseVector<_Scalar, ColMajor, _StorageIndex> type;
};

// TODO this seems almost identical to plain_matrix_type<T, Sparse>
template<typename T,int Rows,int Cols,int Flags> struct sparse_eval {
    typedef typename traits<T>::Scalar _Scalar;
    typedef typename traits<T>::StorageIndex _StorageIndex;
    enum { _Options = ((Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor };
  public:
    typedef SparseMatrix<_Scalar, _Options, _StorageIndex> type;
};

SparseVector.h:

184
185
186
187
188
189
190
191
192
193
194
195
      eigen_assert(i>=0 && i<m_size);

      Index startId = 0;
      Index p = Index(m_data.size()) - 1;
      // TODO smart realloc
      m_data.resize(p+2,1);

      while ( (p >= startId) && (m_data.index(p) > i) )
      {
        m_data.index(p+1) = m_data.index(p);
        m_data.value(p+1) = m_data.value(p);
        --p;

SparseView.h:

75
76
77
78
79
80
81
82
83
84
85
86
};

namespace internal {

// TODO find a way to unify the two following variants
// This is tricky because implementing an inner iterator on top of an IndexBased evaluator is
// not easy because the evaluators do not expose the sizes of the underlying expression.

template<typename ArgType>
struct unary_evaluator<SparseView<ArgType>, IteratorBased>
  : public evaluator_base<SparseView<ArgType> >
{

TriangularSolver.h:

152
153
154
155
156
157
158
159
160
161
162
163
        if (tmp!=Scalar(0)) // optimization when other is actually sparse
        {
          if(!(Mode & UnitDiag))
          {
            // TODO replace this by a binary search. make sure the binary search is safe for partially sorted elements
            LhsIterator it(lhsEval, i);
            while(it && it.index()!=i)
              ++it;
            eigen_assert(it && it.index()==i);
            other.coeffRef(i,col) /= it.value();
          }
          LhsIterator it(lhsEval, i);

TriangularSolver.h:

223
224
225
226
227
228
229
230
231
232
233
234
    res.reserve(other.nonZeros());

    for(Index col=0 ; col<other.cols() ; ++col)
    {
      // FIXME estimate number of non zeros
      tempVector.init(.99/*float(other.col(col).nonZeros())/float(other.rows())*/);
      tempVector.setZero();
      tempVector.restart();
      for (typename Rhs::InnerIterator rhsIt(other, col); rhsIt; ++rhsIt)
      {
        tempVector.coeffRef(rhsIt.index()) = rhsIt.value();
      }

TriangularSolver.h:

270
271
272
273
274
275
276
277
278
279
280
281
      }


      Index count = 0;
      // FIXME compute a reference value to filter zeros
      for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector/*,1e-12*/); it; ++it)
      {
        ++ count;
//         std::cerr << "fill " << it.index() << ", " << col << "\n";
//         std::cout << it.value() << "  ";
        // FIXME use insertBack
        res.insert(it.index(), col) = it.value();

TriangularSolver.h:

276
277
278
279
280
281
282
283
284
285
286
287
      {
        ++ count;
//         std::cerr << "fill " << it.index() << ", " << col << "\n";
//         std::cout << it.value() << "  ";
        // FIXME use insertBack
        res.insert(it.index(), col) = it.value();
      }
//       std::cout << "tempVector.nonZeros() == " << int(count) << " / " << (other.rows()) << "\n";
    }
    res.finalize();
    other = res.markAsRValue();
  }

SparseLU.h:

410
411
412
413
414
415
416
417
418
419
420
421
template <typename MatrixType, typename OrderingType>
void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat)
{

  //TODO  It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat.

  // Firstly, copy the whole input matrix.
  m_mat = mat;

  // Compute fill-in ordering
  OrderingType ord;
  ord(m_mat,m_perm_c);

SparseLU.h:

666
667
668
669
670
671
672
673
674
675
676
677
        return;
      }

      // Update the determinant of the row permutation matrix
      // FIXME: the following test is not correct, we should probably take iperm_c into account and pivrow is not directly the row pivot.
      if (pivrow != jj) m_detPermR = -m_detPermR;

      // Prune columns (0:jj-1) using column jj
      Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);

      // Reset repfnz for this column
      for (i = 0; i < nseg; i++)

SparseLU_SupernodalMatrix.h:

23
24
25
26
27
28
29
30
31
32
33
34
 *
 * NOTE : This class corresponds to the SCformat structure in SuperLU
 *
 */
/* TODO
 * InnerIterator as for sparsematrix
 * SuperInnerIterator to iterate through all supernodes
 * Function for triangular solve
 */
template <typename _Scalar, typename _StorageIndex>
class MappedSuperNodalMatrix
{

SparseLU_SupernodalMatrix.h:

54
55
56
57
58
59
60
61
62
63
64
65
    }
    /**
     * Set appropriate pointers for the lower triangular supernodal matrix
     * These infos are available at the end of the numerical factorization
     * FIXME This class will be modified such that it can be use in the course
     * of the factorization.
     */
    void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,
             IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
    {
      m_row = m;
      m_col = n;

SparseQR.h:

481
482
483
484
485
486
487
488
489
490
491
      tdot *= m_hcoeffs(curIdx);

      // Then update tval = tval - q * tau
      // FIXME: tval -= tdot * m_Q.col(curIdx) should amount to the same (need to check/add support for efficient "dense ?= sparse")
      for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)
        tval(itq.row()) -= itq.value() * tdot;

      // Detect fill-in for the current column of Q
      if(m_etree(Ridx(i)) == nonzeroCol)
      {
        for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq)

SparseQR.h:

506
507
508
509
510
511
512
513
514
515
516
    if(nonzeroCol < diagSize)
    {
      // Compute the Householder reflection that eliminate the current column
      // FIXME this step should call the Householder module.
      Scalar c0 = nzcolQ ? tval(Qidx(0)) : Scalar(0);

      // First, the squared norm of Q((col+1):m, col)
      RealScalar sqrNorm = 0.;
      for (Index itq = 1; itq < nzcolQ; ++itq) sqrNorm += numext::abs2(tval(Qidx(itq)));
      if(sqrNorm == RealScalar(0) && numext::imag(c0) == RealScalar(0))
      {

SparseQR.h:

686
687
688
689
690
691
692
693
694
695
696
697
  }
  const SparseQRType& m_qr;
};

// TODO this actually represents the adjoint of Q
template<typename SparseQRType>
struct SparseQRMatrixQTransposeReturnType
{
  explicit SparseQRMatrixQTransposeReturnType(const SparseQRType& qr) : m_qr(qr) {}
  template<typename Derived>
  SparseQR_QProduct<SparseQRType,Derived> operator*(const MatrixBase<Derived>& other)
  {

SuperLUSupport.h:

215
216
217
218
219
220
221
222
223
224
225
    res.storage.outerInd  = mat.outerIndexPtr();

    res.setScalarType<typename MatrixType::Scalar>();

    // FIXME the following is not very accurate
    if (MatrixType::Flags & Upper)
      res.Mtype = SLU_TRU;
    if (MatrixType::Flags & Lower)
      res.Mtype = SLU_TRL;

    eigen_assert(((MatrixType::Flags & SelfAdjoint)==0) && "SelfAdjoint matrix shape not supported by SuperLU");

SuperLUSupport.h:

274
275
276
277
278
279
280
281
282
283
284
285
    res.storage.outerInd  = mat.outerIndexPtr();

    res.setScalarType<typename MatrixType::Scalar>();

    // FIXME the following is not very accurate
    if (MatrixType::Flags & Upper)
      res.Mtype = SLU_TRU;
    if (MatrixType::Flags & Lower)
      res.Mtype = SLU_TRL;

    eigen_assert(((MatrixType::Flags & SelfAdjoint)==0) && "SelfAdjoint matrix shape not supported by SuperLU");
  }

SuperLUSupport.h:

638
639
640
641
642
643
644
645
646
647
648
649
  StatFree(&m_sluStat);

  m_extractedDataAreDirty = true;

  // FIXME how to better check for errors ???
  m_info = info == 0 ? Success : NumericalIssue;
  m_factorizationIsOk = true;
}

template<typename MatrixType>
template<typename Rhs,typename Dest>
void SuperLU<MatrixType>::_solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest>& x) const

SuperLUSupport.h:

961
962
963
964
965
966
967
968
969
970
971
972
                &recip_pivot_growth, &rcond,
                &m_sluStat, &info, Scalar());
  StatFree(&m_sluStat);

  // FIXME how to better check for errors ???
  m_info = info == 0 ? Success : NumericalIssue;
  m_factorizationIsOk = true;
}

#ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename MatrixType>
template<typename Rhs,typename Dest>

UmfPackSupport.h:

11
12
13
14
15
16
17
18
19
20
21
#define EIGEN_UMFPACKSUPPORT_H

namespace Eigen {

/* TODO extract L, extract U, compute det, etc... */

// generic double/complex<double> wrapper functions:


inline void umfpack_defaults(double control[UMFPACK_CONTROL], double)
{ umfpack_di_defaults(control); }

ArrayCwiseBinaryOps.h:

103
104
105
106
107
108
109
110
111
112
113
114
const CwiseBinaryOp<internal::scalar_pow_op<Scalar,T>,Derived,Constant<T> > pow(const T& exponent) const;
#endif


// TODO code generating macros could be moved to Macros.h and could include generation of documentation
#define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \
template<typename OtherDerived> \
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived> \
OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
{ \
  return CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived>(derived(), other.derived()); \
}\

mpreal.h:

740
741
742
743
744
745
746
747
748
749
750
751
namespace internal{

    // Use SFINAE to restrict arithmetic operations instantiation only for numeric types
    // This is needed for smooth integration with libraries based on expression templates, like Eigen.
    // TODO: Do the same for boolean operators.
    template <typename ArgumentType> struct result_type {};

    template <> struct result_type<mpreal>              {typedef mpreal type;};
    template <> struct result_type<mpz_t>               {typedef mpreal type;};
    template <> struct result_type<mpq_t>               {typedef mpreal type;};
    template <> struct result_type<long double>         {typedef mpreal type;};
    template <> struct result_type<double>              {typedef mpreal type;};

mpreal.h:

1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
#endif

inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const
{
    // TODO: Add extended format specification (f, e, rounding mode) as it done in output operator
    (void)b;
    (void)mode;

#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))

    std::ostringstream format;

mpreal.h:

1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
}

inline std::istream& operator>>(std::istream &is, mpreal& v)
{
    // TODO: use cout::hexfloat and other flags to setup base
    std::string tmp;
    is >> tmp;
    mpfr_set_str(v.mpfr_ptr(), tmp.c_str(), 10, mpreal::get_default_rnd());
    return is;
}

//////////////////////////////////////////////////////////////////////////

AssignmentFunctors.h:

144
145
146
147
148
149
150
151
152
153
154
155
  EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
  {
#ifdef __CUDACC__
    // FIXME is there some kind of cuda::swap?
    Scalar t=b; const_cast<Scalar&>(b)=a; a=t;
#else
    using std::swap;
    swap(a,const_cast<Scalar&>(b));
#endif
  }
};

BinaryFunctors.h:

51
52
53
54
55
56
57
58
59
60
61
62
struct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {
  enum {
    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2, // rough estimate!
    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAdd && packet_traits<RhsScalar>::HasAdd
    // TODO vectorize mixed sum
  };
};

/** \internal
  * \brief Template specialization to deprecate the summation of boolean expressions.
  * This is required to solve Bug 426.
  * \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast()

BinaryFunctors.h:

 95
 96
 97
 98
 99
100
101
102
103
104
105
106
struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
  enum {
    Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
    // TODO vectorize mixed product
  };
};

/** \internal
  * \brief Template functor to compute the conjugate product of two scalars
  *
  * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)

UnaryFunctors.h:

753
754
755
756
757
758
759
760
761
762
763
764
  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
  {
      return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
  }
  //TODO
  //template <typename Packet>
  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
};
template<typename Scalar>
struct scalar_sign_op<Scalar,true> {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const

UnaryFunctors.h:

769
770
771
772
773
774
775
776
777
778
779
780
      return Scalar(0);
    aa = real_type(1)/aa;
    return Scalar(real(a)*aa, imag(a)*aa );
  }
  //TODO
  //template <typename Packet>
  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
};
template<typename Scalar>
struct functor_traits<scalar_sign_op<Scalar> >
{ enum {
    Cost =

GeneralBlockPanelKernel.h:

188
189
190
191
192
193
194
195
196
197
198
199
    }

    // ---- 2nd level of blocking on max(L2,L3), yields nc ----

    // TODO find a reliable way to get the actual amount of cache per core to use for 2nd level blocking, that is:
    //      actual_l2 = max(l2, l3/nb_core_sharing_l3)
    // The number below is quite conservative: it is better to underestimate the cache size rather than overestimating it)
    // For instance, it corresponds to 6MB of L3 shared among 4 cores.
    #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
    const Index actual_l2 = l3;
    #else
    const Index actual_l2 = 1572864; // == 1.5 MB

GeneralBlockPanelKernel.h:

232
233
234
235
236
237
238
239
240
241
242
243
    else if(old_k==k)
    {
      // So far, no blocking at all, i.e., kc==k, and nc==n.
      // In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2
      // TODO: part of this blocking strategy is now implemented within the kernel itself, so the L1-based heuristic here should be obsolete.
      Index problem_size = k*n*sizeof(LhsScalar);
      Index actual_lm = actual_l2;
      Index max_mc = m;
      if(problem_size<=1024)
      {
        // problem is small enough to keep in L1
        // Let's choose m such that lhs's block fit in 1/3 of L1

GeneralBlockPanelKernel.h:

310
311
312
313
314
315
316
317
318
319
320
321
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
  #define CJMADD(CJ,A,B,C,T)  C = CJ.pmadd(A,B,C);
#else

  // FIXME (a bit overkill maybe ?)

  template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
    EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
    {
      c = cj.pmadd(a,b,c);
    }
  };

GeneralBlockPanelKernel.h:

613
614
615
616
617
618
619
620
621
622
623
    ResPacketSize   = Vectorizable ? packet_traits<ResScalar>::size : 1,
    LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
    RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,

    // FIXME: should depend on NumberOfRegisters
    nr = 4,
    mr = ResPacketSize,

    LhsProgress = ResPacketSize,
    RhsProgress = 1
  };

GeneralBlockPanelKernel.h:

663
664
665
666
667
668
669
670
671
672
673
674
  }

  EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
  {
    // FIXME not sure that's the best way to implement it!
    loadRhs(b+0, b0);
    loadRhs(b+1, b1);
    loadRhs(b+2, b2);
    loadRhs(b+3, b3);
  }

  // Vectorized path

GeneralBlockPanelKernel.h:

673
674
675
676
677
678
679
680
681
682
683
  // Vectorized path
  EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, DoublePacketType& b0, DoublePacketType& b1)
  {
    // FIXME not sure that's the best way to implement it!
    loadRhs(b+0, b0);
    loadRhs(b+1, b1);
  }

  // Scalar path
  EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsScalar& b0, RhsScalar& b1)
  {

GeneralBlockPanelKernel.h:

681
682
683
684
685
686
687
688
689
690
691
  // Scalar path
  EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsScalar& b0, RhsScalar& b1)
  {
    // FIXME not sure that's the best way to implement it!
    loadRhs(b+0, b0);
    loadRhs(b+1, b1);
  }

  // nothing special here
  EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
  {

GeneralBlockPanelKernel.h:

761
762
763
764
765
766
767
768
769
770
771
    RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
    ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,

    NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
    // FIXME: should depend on NumberOfRegisters
    nr = 4,
    mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*ResPacketSize,

    LhsProgress = ResPacketSize,
    RhsProgress = 1
  };

GeneralMatrixMatrixTriangular_BLAS.h:

66
67
68
69
70
71
72
73
74
75
76
77
};

EIGEN_BLAS_RANKUPDATE_SPECIALIZE(double)
EIGEN_BLAS_RANKUPDATE_SPECIALIZE(float)
// TODO handle complex cases
// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(dcomplex)
// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(scomplex)

// SYRK for float/double
#define EIGEN_BLAS_RANKUPDATE_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int AStorageOrder, bool ConjugateA, int  UpLo> \
struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \

GeneralMatrixMatrixTriangular_BLAS.h:

132
133
134
135
136
137
138
139
140
141
142
143
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
EIGEN_BLAS_RANKUPDATE_R(float,  float,  ssyrk_)
#endif

// TODO hanlde complex cases
// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
// EIGEN_BLAS_RANKUPDATE_C(scomplex, float,  float, cherk_)


} // end namespace internal

} // end namespace Eigen

GeneralMatrixVector.h:

147
148
149
150
151
152
153
154
155
156
157
    alignmentPattern = NoneAligned;
  }
  else if(LhsPacketSize > 4)
  {
    // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
    // Currently, it seems to be better to perform unaligned loads anyway
    alignmentPattern = NoneAligned;
  }
  else if (LhsPacketSize>1)
  {
  //    eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);

GeneralMatrixVector.h:

422
423
424
425
426
427
428
429
430
431
432
433
    alignmentPattern = NoneAligned;
  }
  else if(LhsPacketSize > 4)
  {
    // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
    alignmentPattern = NoneAligned;
  }
  else if (LhsPacketSize>1)
  {
  //    eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0  || depth<LhsPacketSize);

    while (skipRows<LhsPacketSize &&

GeneralMatrixVector.h:

462
463
464
465
466
467
468
469
470
471
  Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
  for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
  {
    // FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ??
    EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
    ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);

    // this helps the compiler generating good binary code
    const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0),    lhs1 = lhs.getVectorMapper(i+offset1, 0),
                     lhs2 = lhs.getVectorMapper(i+2, 0),    lhs3 = lhs.getVectorMapper(i+offset3, 0);

GeneralMatrixVector.h:

477
478
479
480
481
482
483
484
485
486
487
      ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
                ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));

      // process initial unaligned coeffs
      // FIXME this loop get vectorized by the compiler !
      for (Index j=0; j<alignedStart; ++j)
      {
        RhsScalar b = rhs(j, 0);
        tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
        tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
      }

GeneralMatrixVector.h:

552
553
554
555
556
557
558
559
560
561
562
563
      }
    } // end explicit vectorization

    // process remaining coeffs (or all if no explicit vectorization)
    // FIXME this loop get vectorized by the compiler !
    for (Index j=alignedSize; j<depth; ++j)
    {
      RhsScalar b = rhs(j, 0);
      tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
      tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
    }
    res[i*resIncr]            += alpha*tmp0;

GeneralMatrixVector.h:

576
577
578
579
580
581
582
583
584
585
586
587
      EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
      ResPacket ptmp0 = pset1<ResPacket>(tmp0);
      const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
      // process first unaligned result's coeffs
      // FIXME this loop get vectorized by the compiler !
      for (Index j=0; j<alignedStart; ++j)
        tmp0 += cj.pmul(lhs0(j), rhs(j, 0));

      if (alignedSize>alignedStart)
      {
        // process aligned rhs coeffs
        if (lhs0.template aligned<LhsPacket>(alignedStart))

GeneralMatrixVector.h:

593
594
595
596
597
598
599
600
601
602
603
604
        tmp0 += predux(ptmp0);
      }

      // process remaining scalars
      // FIXME this loop get vectorized by the compiler !
      for (Index j=alignedSize; j<depth; ++j)
        tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
      res[i*resIncr] += alpha*tmp0;
    }
    if (skipRows)
    {
      start = 0;

Parallelizer.h:

84
85
86
87
88
89
90
91
92
93
94
template<bool Condition, typename Functor, typename Index>
void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
{
  // TODO when EIGEN_USE_BLAS is defined,
  // we should still enable OMP for other scalar types
#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
  // FIXME the transpose variable is only needed to properly split
  // the matrix product when multithreading is enabled. This is a temporary
  // fix to support row-major destination matrices. This whole
  // parallelizer mechanism has to be redisigned anyway.
  EIGEN_UNUSED_VARIABLE(depth);

Parallelizer.h:

87
88
89
90
91
92
93
94
95
96
97
98
{
  // TODO when EIGEN_USE_BLAS is defined,
  // we should still enable OMP for other scalar types
#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
  // FIXME the transpose variable is only needed to properly split
  // the matrix product when multithreading is enabled. This is a temporary
  // fix to support row-major destination matrices. This whole
  // parallelizer mechanism has to be redisigned anyway.
  EIGEN_UNUSED_VARIABLE(depth);
  EIGEN_UNUSED_VARIABLE(transpose);
  func(0,rows, 0,cols);
#else

Parallelizer.h:

118
119
120
121
122
123
124
125
126
127
128
129
  Index threads = std::min<Index>(nbThreads(), pb_max_threads);

  // if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session,
  // then abort multi-threading
  // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
  if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
    return func(0,rows, 0,cols);

  Eigen::initParallel();
  func.initParallelSession(threads);

  if(transpose)

TriangularMatrixMatrix_BLAS.h:

108
109
110
111
112
113
114
115
116
117
118
119
\
/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
   if (rows != depth) { \
\
     /* FIXME handle mkl_domain_get_max_threads */ \
     /*int nthr = mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS);*/ int nthr = 1;\
\
     if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
     /* Most likely no benefit to call TRMM or GEMM from BLAS */ \
       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \

TriangularMatrixVector.h:

198
199
200
201
202
203
204
205
206
207
208
209
} // end namespace internal

namespace internal {

// TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same.

template<int Mode> struct trmv_selector<Mode,ColMajor>
{
  template<typename Lhs, typename Rhs, typename Dest>
  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
    typedef typename Lhs::Scalar      LhsScalar;

TriangularMatrixVector.h:

225
226
227
228
229
230
231
232
233
234
235
236
    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
    ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;

    enum {
      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
      // on, the other hand it is good for the cache to pack the vector anyways...
      EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
      MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
    };

    gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;

TriangularSolverMatrix.h:

114
115
116
117
118
119
120
121
122
123
124
125
          Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
          // tr solve
          for (Index k=0; k<actualPanelWidth; ++k)
          {
            // TODO write a small kernel handling this (can be shared with trsv)
            Index i  = IsLower ? k2+k1+k : k2-k1-k-1;
            Index rs = actualPanelWidth - k - 1; // remaining size
            Index s  = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
                                                 :  IsLower ? i+1 : i-rs;

            Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
            for (Index j=j2; j<j2+actual_cols; ++j)

Constants.h:

251
252
253
254
255
256
257
258
259
260
261
262
};

/** \ingroup enums
 * Enum used by DenseBase::corner() in Eigen2 compatibility mode. */
// FIXME after the corner() API change, this was not needed anymore, except by AlignedBox
// TODO: find out what to do with that. Adapt the AlignedBox API ?
enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };

/** \ingroup enums
  * Enum containing possible values for the \p Direction parameter of
  * Reverse, PartialReduxExpr and VectorwiseOp. */
enum DirectionType {

Constants.h:

252
253
254
255
256
257
258
259
260
261
262
/** \ingroup enums
 * Enum used by DenseBase::corner() in Eigen2 compatibility mode. */
// FIXME after the corner() API change, this was not needed anymore, except by AlignedBox
// TODO: find out what to do with that. Adapt the AlignedBox API ?
enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };

/** \ingroup enums
  * Enum containing possible values for the \p Direction parameter of
  * Reverse, PartialReduxExpr and VectorwiseOp. */
enum DirectionType {
  /** For Reverse, all columns are reversed;

Macros.h:

133
134
135
136
137
138
139
140
141
142
  #define EIGEN_GNUC_AT_MOST(x,y)  0
  #define EIGEN_GNUC_AT(x,y)       0
#endif

// FIXME: could probably be removed as we do not support gcc 3.x anymore
#if EIGEN_COMP_GNUC && (__GNUC__ <= 3)
#define EIGEN_GCC3_OR_OLDER 1
#else
#define EIGEN_GCC3_OR_OLDER 0
#endif

Macros.h:

497
498
499
500
501
502
503
504
505
506
507
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
// attribute to maximize inlining. This should only be used when really necessary: in particular,
// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
// FIXME with the always_inline attribute,
// gcc 3.4.x and 4.1 reports the following compilation error:
//   Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
//    : function body not available
//   See also bug 1367
#if EIGEN_GNUC_AT_LEAST(4,2)
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
#else

Macros.h:

652
653
654
655
656
657
658
659
660
661
662
663
  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#elif EIGEN_COMP_MSVC
  #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
#elif EIGEN_COMP_SUNCC
  // FIXME not sure about this one:
  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#else
  #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
#endif

// If the user explicitly disable vectorization, then we also disable alignment
#if defined(EIGEN_DONT_VECTORIZE)

Macros.h:

865
866
867
868
869
870
871
872
873
874
875
876
  using Base::derived; \
  using Base::const_cast_derived;


// FIXME Maybe the EIGEN_DENSE_PUBLIC_INTERFACE could be removed as importing PacketScalar is rarely needed
#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
  EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
  typedef typename Base::PacketScalar PacketScalar;


#define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
#define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)

Meta.h:

454
455
456
457
458
459
460
461
462
463
464
{
  enum { Defined = 0 };
};

// FIXME quick workaround around current limitation of result_of
// template<typename Scalar, typename ArgType0, typename ArgType1>
// struct result_of<scalar_product_op<Scalar>(ArgType0,ArgType1)> {
// typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type;
// };

} // end namespace internal

StaticAssert.h:

210
211
212
213
214
215
216
217
218
                                            >::value), \
                          YOU_CANNOT_MIX_ARRAYS_AND_MATRICES)

// Check that a cost value is positive, and that is stay within a reasonable range
// TODO this check could be enabled for internal debugging only
#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \
      EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE);

#endif // EIGEN_STATIC_ASSERT_H

XprHelper.h:

11
12
13
14
15
16
17
18
19
20
21
22
#ifndef EIGEN_XPRHELPER_H
#define EIGEN_XPRHELPER_H

// just a workaround because GCC seems to not really like empty structs
// FIXME: gcc 4.3 generates bad code when strict-aliasing is enabled
// so currently we simply disable this optimization for gcc 4.3
#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3)
  #define EIGEN_EMPTY_STRUCT_CTOR(X) \
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {}
#else
  #define EIGEN_EMPTY_STRUCT_CTOR(X)

XprHelper.h:

251
252
253
254
255
256
257
258
259
260
261
262
class compute_matrix_flags
{
    enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };
  public:
    // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<>
    // and then propagate this information to the evaluator's flags.
    // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage.
    enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
};

template<int _Rows, int _Cols> struct size_at_compile_time
{

AutoDiffVector.h:

59
60
61
62
63
64
65
66
67
68
69
    const CoeffType coeffRef(Index i) const { return CoeffType(m_values[i], m_jacobian.col(i)); }

    Index size() const { return m_values.size(); }

    // FIXME here we could return an expression of the sum
    Scalar sum() const { /*std::cerr << "sum \n\n";*/ /*std::cerr << m_jacobian.rowwise().sum() << "\n\n";*/ return Scalar(m_values.sum(), m_jacobian.rowwise().sum()); }


    inline AutoDiffVector(const ValueType& values, const JacobianType& jac)
      : m_values(values), m_jacobian(jac)
    {}

EulerAngles.h:

294
295
296
297
298
299
300
301
302
303
304
305
      }

      /*EulerAngles& fromQuaternion(const QuaternionType& q)
      {
        // TODO: Implement it in a faster way for quaternions
        // According to http://www.euclideanspace.com/maths/geometry/rotations/conversions/quaternionToEuler/
        //  we can compute only the needed matrix cells and then convert to euler angles. (see ZYX example below)
        // Currently we compute all matrix cells from quaternion.

        // Special case only for ZYX
        //Scalar y2 = q.y() * q.y();
        //m_angles[0] = std::atan2(2*(q.w()*q.z() + q.x()*q.y()), (1 - 2*(y2 + q.z()*q.z())));

EulerAngles.h:

315
316
317
318
319
320
321
322
323
324
325
326
        System::CalcEulerAngles(*this, m);
        return *this;
      }

      // TODO: Assign and construct from another EulerAngles (with different system)

      /** Set \c *this from a rotation. */
      template<typename Derived>
      EulerAngles& operator=(const RotationBase<Derived, 3>& rot) {
        System::CalcEulerAngles(*this, rot.toRotationMatrix());
        return *this;
      }

EulerAngles.h:

324
325
326
327
328
329
330
331
332
333
334
        System::CalcEulerAngles(*this, rot.toRotationMatrix());
        return *this;
      }

      // TODO: Support isApprox function

      /** \returns an equivalent 3x3 rotation matrix. */
      Matrix3 toRotationMatrix() const
      {
        return static_cast<QuaternionType>(*this).toRotationMatrix();
      }

EulerSystem.h:

17
18
19
20
21
22
23
24
25
26
27
28
  class EulerAngles;

  namespace internal
  {
    // TODO: Check if already exists on the rest API
    template <int Num, bool IsPositive = (Num > 0)>
    struct Abs
    {
      enum { value = Num };
    };

    template <int Num>

EulerSystem.h:

174
175
176
177
178
179
180
181
182
183
184
      J = (AlphaAxisAbs - 1 + 1 + IsOdd)%3,
      K = (AlphaAxisAbs - 1 + 2 - IsOdd)%3
    };

    // TODO: Get @mat parameter in form that avoids double evaluation.
    template <typename Derived>
    static void CalcEulerAngles_imp(Matrix<typename MatrixBase<Derived>::Scalar, 3, 1>& res, const MatrixBase<Derived>& mat, internal::true_type /*isTaitBryan*/)
    {
      using std::atan2;
      using std::sin;
      using std::cos;

ei_kissfft_impl.h:

387
388
389
390
391
392
393
394
395
396
397
  inline
    PlanData & get_plan(int nfft, bool inverse)
    {
      // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles
      PlanData & pd = m_plans[ PlanKey(nfft,inverse) ];
      if ( pd.m_twiddles.size() == 0 ) {
        pd.make_twiddles(nfft,inverse);
        pd.factorize(nfft);
      }
      return pd;
    }

ConstrainedConjGrad.h:

48
49
50
51
52
53
54
55
56
57
58
59
{
  // optimisable : copie de la ligne, precalcul de C * trans(C).
  typedef typename CMatrix::Scalar Scalar;
  typedef typename CMatrix::Index Index;
  // FIXME use sparse vectors ?
  typedef Matrix<Scalar,Dynamic,1> TmpVec;

  Index rows = C.rows(), cols = C.cols();

  TmpVec d(rows), e(rows), l(cols), p(rows), q(rows), r(rows);
  Scalar rho, rho_1, alpha;
  d.setZero();

ConstrainedConjGrad.h:

82
83
84
85
86
87
88
89
90
91
92
93
      p = (rho/rho_1) * p + r;
    }

    l = C.transpose() * e; // l is the i-th row of CINV
    // FIXME add a generic "prune/filter" expression for both dense and sparse object to sparse
    for (Index j=0; j<l.size(); ++j)
      if (l[j]<1e-15)
    tripletList.push_back(T(i,j,l(j)));


    d[i] = 0.0;
  }

ConstrainedConjGrad.h:

144
145
146
147
148
149
150
151
152
153
154
          transition = true;
        }
        Scalar bb = CINV.row(i).dot(z);
        if (bb > 0.0)
          // FIXME: we should allow that: z += -bb * C.row(i);
          for (typename CMatrix::InnerIterator it(C,i); it; ++it)
            z.coeffRef(it.index()) -= bb*it.value();
      }
      else
        satured[i] = false;
    }

DGMRES.h:

32
33
34
35
36
37
38
39
40
41
42
43
  * \param perm gives the sorted sequence on output. Must be initialized with 0..n-1
  * \param ncut Put  the ncut smallest elements at the end of the vector
  * WARNING This is an expensive sort, so should be used only
  * for small size vectors
  * TODO Use modified QuickSplit or std::nth_element to get the smallest values
  */
template <typename VectorType, typename IndexType>
void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::Scalar& ncut)
{
  eigen_assert(vec.size() == perm.size());
  bool flag;
  for (Index k  = 0; k < ncut; k++)

DGMRES.h:

322
323
324
325
326
327
328
329
330
331
332
333
    m_V.col(it+1) = tv1/coef;
    m_H(it+1, it) = coef;
//     m_Hes(it+1,it) = coef;

    // FIXME Check for happy breakdown

    // Update Hessenberg matrix with Givens rotations
    for (Index i = 1; i <= it; ++i)
    {
      m_H.col(it).applyOnTheLeft(i-1,i,gr[i-1].adjoint());
    }
    // Compute the new plane rotation

DGMRES.h:

350
351
352
353
354
355
356
357
358
359
360
361
  }

  // Compute the new coefficients by solving the least square problem
//   it++;
  //FIXME  Check first if the matrix is singular ... zero diagonal
  DenseVector nrs(m_restart);
  nrs = m_H.topLeftCorner(it,it).template triangularView<Upper>().solve(g.head(it));

  // Form the new solution
  if (m_isDeflInitialized)
  {
    tv1 = m_V.leftCols(it) * nrs;

DGMRES.h:

493
494
495
496
497
498
499
500
501
502
503
  // Factorize m_T into m_luT
  m_luT.compute(m_T.topLeftCorner(m_r, m_r));

  //FIXME CHeck if the factorization was correctly done (nonsingular matrix)
  m_isDeflInitialized = true;
  return 0;
}
template<typename _MatrixType, typename _Preconditioner>
template<typename RhsType, typename DestType>
Index DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const
{

GMRES.h:

106
107
108
109
110
111
112
113
114
115
116
    v = VectorType::Unit(m, k - 1);

    // apply Householder reflections H_{1} ... H_{k-1} to v
    // TODO: use a HouseholderSequence
    for (Index i = k - 1; i >= 0; --i) {
      v.tail(m - i).applyHouseholderOnTheLeft(H.col(i).tail(m - i - 1), tau.coeffRef(i), workspace.data());
    }

    // apply matrix M to v:  v = mat * v;
    t.noalias() = mat * v;
    v = precond.solve(t);

GMRES.h:

116
117
118
119
120
121
122
123
124
125
126
127
    t.noalias() = mat * v;
    v = precond.solve(t);

    // apply Householder reflections H_{k-1} ... H_{1} to v
    // TODO: use a HouseholderSequence
    for (Index i = 0; i < k; ++i) {
      v.tail(m - i).applyHouseholderOnTheLeft(H.col(i).tail(m - i - 1), tau.coeffRef(i), workspace.data());
    }

    if (v.tail(m - k).norm() != 0.0)
    {
      if (k <= restart)

Scaling.h:

65
66
67
68
69
70
71
72
73
74
75
    /**
     * Compute the left and right diagonal matrices to scale the input matrix @p mat
     *
     * FIXME This algorithm will be modified such that the diagonal elements are permuted on the diagonal.
     *
     * \sa LeftScaling() RightScaling()
     */
    void compute (const MatrixType& mat)
    {
      using std::abs;
      int m = mat.rows();

KroneckerTensorProduct.h:

163
164
165
166
167
168
169
170
171
172
173
174
  typedef Eigen::InnerIterator<Rhs1Cleaned> RhsInnerIterator;

  // compute number of non-zeros per innervectors of dst
  {
    // TODO VectorXi is not necessarily big enough!
    VectorXi nnzA = VectorXi::Zero(Dest::IsRowMajor ? m_A.rows() : m_A.cols());
    for (Index kA=0; kA < m_A.outerSize(); ++kA)
      for (LhsInnerIterator itA(lhs1,kA); itA; ++itA)
        nnzA(Dest::IsRowMajor ? itA.row() : itA.col())++;

    VectorXi nnzB = VectorXi::Zero(Dest::IsRowMajor ? m_B.rows() : m_B.cols());
    for (Index kB=0; kB < m_B.outerSize(); ++kB)

LMpar.h:

60
61
62
63
64
65
66
67
68
69
70
71
    //    const Index rank = qr.nonzeroPivots(); // exactly double(0.)
    const Index rank = qr.rank(); // use a threshold
    wa1 = qtb;
    wa1.tail(n-rank).setZero();
    //FIXME There is no solve in place for sparse triangularView
    wa1.head(rank) = s.topLeftCorner(rank,rank).template triangularView<Upper>().solve(qtb.head(rank));

    x = qr.colsPermutation()*wa1;

    /* initialize the iteration counter. */
    /* evaluate the function at the origin, and test */
    /* for acceptance of the gauss-newton direction. */

LevenbergMarquardt.h:

298
299
300
301
302
303
304
305
306
307
    m_wa1.resize(n); m_wa2.resize(n); m_wa3.resize(n);
    m_wa4.resize(m);
    m_fvec.resize(m);
    //FIXME Sparse Case : Allocate space for the jacobian
    m_fjac.resize(m, n);
//     m_fjac.reserve(VectorXi::Constant(n,5)); // FIXME Find a better alternative
    if (!m_useExternalScaling)
        m_diag.resize(n);
    eigen_assert( (!m_useExternalScaling || m_diag.size()==n) && "When m_useExternalScaling is set, the caller must provide a valid 'm_diag'");
    m_qtf.resize(n);

MatrixFunction.h:

62
63
64
65
66
67
68
69
70
71
72
template <typename MatrixType>
MatrixType MatrixFunctionAtomic<MatrixType>::compute(const MatrixType& A)
{
  // TODO: Use that A is upper triangular
  typedef typename NumTraits<Scalar>::Real RealScalar;
  typedef typename MatrixType::Index Index;
  Index rows = A.rows();
  Scalar avgEival = A.trace() / Scalar(RealScalar(rows));
  MatrixType Ashifted = A - avgEival * MatrixType::Identity(rows, rows);
  RealScalar mu = matrix_function_compute_mu(Ashifted);
  MatrixType F = m_f(avgEival, 0) * MatrixType::Identity(rows, rows);

MatrixPower.h:

26
27
28
29
30
31
32
33
34
35
36
37
 * should not be changed in the meantime). It is the return type of
 * MatrixPower::operator() and related functions and most of the
 * time this is the only way it is used.
 */
/* TODO This class is only used by MatrixPower, so it should be nested
 * into MatrixPower, like MatrixPower::ReturnValue. However, my
 * compiler complained about unused template parameter in the
 * following declaration in namespace internal.
 *
 * template<typename MatrixType>
 * struct traits<MatrixPower<MatrixType>::ReturnValue>;
 */

MatrixSquareRoot.h:

18
19
20
21
22
23
24
25
26
27
28
29
// post: sqrtT.block(i,i,2,2) is square root of T.block(i,i,2,2)
template <typename MatrixType, typename ResultType>
void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, typename MatrixType::Index i, ResultType& sqrtT)
{
  // TODO: This case (2-by-2 blocks with complex conjugate eigenvalues) is probably hidden somewhere
  //       in EigenSolver. If we expose it, we could call it directly from here.
  typedef typename traits<MatrixType>::Scalar Scalar;
  Matrix<Scalar,2,2> block = T.template block<2,2>(i,i);
  EigenSolver<Matrix<Scalar,2,2> > es(block);
  sqrtT.template block<2,2>(i,i)
    = (es.eigenvectors() * es.eigenvalues().cwiseSqrt().asDiagonal() * es.eigenvectors().inverse()).real();
}

HybridNonLinearSolver.h:

64
65
66
67
68
69
70
71
72
73
74
        Scalar epsfcn;
    };
    typedef Matrix< Scalar, Dynamic, 1 > FVectorType;
    typedef Matrix< Scalar, Dynamic, Dynamic > JacobianType;
    /* TODO: if eigen provides a triangular storage, use it here */
    typedef Matrix< Scalar, Dynamic, Dynamic > UpperTriangularType;

    HybridNonLinearSolverSpace::Status hybrj1(
            FVectorType  &x,
            const Scalar tol = std::sqrt(NumTraits<Scalar>::epsilon())
            );

LevenbergMarquardt.h:

465
466
467
468
469
470
471
472
473
474
475
476
    }
    permutation.setIdentity(n);
    if (sing) {
        wa2 = fjac.colwise().blueNorm();
        // TODO We have no unit test covering this code path, do not modify
        // until it is carefully tested
        ColPivHouseholderQR<JacobianType> qrfac(fjac);
        fjac = qrfac.matrixQR();
        wa1 = fjac.diagonal();
        fjac.diagonal() = qrfac.hCoeffs();
        permutation = qrfac.colsPermutation();
        // TODO : avoid this:

LevenbergMarquardt.h:

472
473
474
475
476
477
478
479
480
481
482
483
        fjac = qrfac.matrixQR();
        wa1 = fjac.diagonal();
        fjac.diagonal() = qrfac.hCoeffs();
        permutation = qrfac.colsPermutation();
        // TODO : avoid this:
        for(Index ii=0; ii< fjac.cols(); ii++) fjac.col(ii).segment(ii+1, fjac.rows()-ii-1) *= fjac(ii,ii); // rescale vectors

        for (j = 0; j < n; ++j) {
            if (fjac(j,j) != 0.) {
                sum = 0.;
                for (i = j; i < n; ++i)
                    sum += fjac(i,j) * qtf[i];

dogleg.h:

47
48
49
50
51
52
53
54
55
56
57
58
    qnorm = diag.cwiseProduct(x).stableNorm();
    if (qnorm <= delta)
        return;

    // TODO : this path is not tested by Eigen unit tests

    /* the gauss-newton direction is not acceptable. */
    /* next, calculate the scaled gradient direction. */

    wa1.fill(0.);
    for (j = 0; j < n; ++j) {
        wa1.tail(n-j) += qrfac.row(j).tail(n-j) * qtb[j];

dogleg.h:

69
70
71
72
73
74
75
76
77
78
79
    /* calculate the point along the scaled gradient */
    /* at which the quadratic is minimized. */
    wa1.array() /= (diag*gnorm).array();
    // TODO : once unit tests cover this part,:
    // wa2 = qrfac.template triangularView<Upper>() * wa1;
    for (j = 0; j < n; ++j) {
        sum = 0.;
        for (i = j; i < n; ++i) {
            sum += qrfac(j,i) * wa1[i];
        }
        wa2[j] = sum;

qrsolv.h:

 2
 3
 4
 5
 6
 7
 8
 9
10
11
namespace internal {

// TODO : once qrsolv2 is removed, use ColPivHouseholderQR or PermutationMatrix instead of ipvt
template <typename Scalar>
void qrsolv(
        Matrix< Scalar, Dynamic, Dynamic > &s,
        // TODO : use a PermutationMatrix once lmpar is no more:
        const VectorXi &ipvt,
        const Matrix< Scalar, Dynamic, 1 >  &diag,
        const Matrix< Scalar, Dynamic, 1 >  &qtb,

qrsolv.h:

 5
 6
 7
 8
 9
10
11
12
13
14
15
16
// TODO : once qrsolv2 is removed, use ColPivHouseholderQR or PermutationMatrix instead of ipvt
template <typename Scalar>
void qrsolv(
        Matrix< Scalar, Dynamic, Dynamic > &s,
        // TODO : use a PermutationMatrix once lmpar is no more:
        const VectorXi &ipvt,
        const Matrix< Scalar, Dynamic, 1 >  &diag,
        const Matrix< Scalar, Dynamic, 1 >  &qtb,
        Matrix< Scalar, Dynamic, 1 >  &x,
        Matrix< Scalar, Dynamic, 1 >  &sdiag)

{

r1mpyq.h:

 2
 3
 4
 5
 6
 7
 8
 9
10
11
namespace internal {

// TODO : move this to GivensQR once there's such a thing in Eigen

template <typename Scalar>
void r1mpyq(DenseIndex m, DenseIndex n, Scalar *a, const std::vector<JacobiRotation<Scalar> > &v_givens, const std::vector<JacobiRotation<Scalar> > &w_givens)
{
    typedef DenseIndex Index;

    /*     apply the first set of givens rotations to a. */

NumericalDiff.h:

71
72
73
74
75
76
77
78
79
80
81
82
        const typename InputType::Index n = _x.size();
        const Scalar eps = sqrt(((std::max)(epsfcn,NumTraits<Scalar>::epsilon() )));
        ValueType val1, val2;
        InputType x = _x;
        // TODO : we should do this only if the size is not already known
        val1.resize(Functor::values());
        val2.resize(Functor::values());

        // initialization
        switch(mode) {
            case Forward:
                // compute f(x)

SkylineInplaceLU.h:

335
336
337
338
339
340
341
342
343
        while (uIt) {
            x->coeffRef(uIt.row()) -= x_col * uIt.value();
            //TODO : introduce --operator
            uIt += -1;
        }


    }
    x->coeffRef(0) = x->coeff(0) / m_lu.coeffDiag(0);

SkylineMatrix.h:

579
580
581
582
583
584
585
586
587
588
589
590
        m_data.squeeze();
    }

    void prune(Scalar reference, RealScalar epsilon = dummy_precision<RealScalar > ()) {
        //TODO
    }

    /** Resizes the matrix to a \a rows x \a cols matrix and initializes it to zero
     * \sa resizeNonZeros(Index), reserve(), setZero()
     */
    void resize(size_t rows, size_t cols) {
        const Index diagSize = rows > cols ? cols : rows;

SkylineMatrix.h:

673
674
675
676
677
678
679
680
681
682
683
    template<typename OtherDerived>
            inline SkylineMatrix & operator=(const SkylineMatrixBase<OtherDerived>& other) {
        const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
        if (needToTranspose) {
            //         TODO
            //            return *this;
        } else {
            // there is no special optimization
            return SkylineMatrixBase<SkylineMatrix>::operator=(other.derived());
        }
    }

SkylineMatrixBase.h:

144
145
146
147
148
149
150
151
152
153
154
        return derived();
    }

    SkylineMatrixBase() : m_isRValue(false) {
        /* TODO check flags */
    }

    inline Derived & operator=(const Derived& other) {
        this->operator=<Derived > (other);
        return derived();
    }

SkylineMatrixBase.h:

165
166
167
168
169
170
171
172
173
174
175
176
    }

    template<typename OtherDerived>
            inline Derived & operator=(const SkylineMatrixBase<OtherDerived>& other) {
        //TODO
    }

    template<typename Lhs, typename Rhs>
            inline Derived & operator=(const SkylineProduct<Lhs, Rhs, SkylineTimeSkylineProduct>& product);

    friend std::ostream & operator <<(std::ostream & s, const SkylineMatrixBase& m) {
        s << m.derived();

SkylineStorage.h:

194
195
196
197
198
199
200
201
202
203
204
205
        memset(m_lowerProfile, 0, m_diagSize * sizeof (Index));
    }

    void prune(Scalar reference, RealScalar epsilon = dummy_precision<RealScalar>()) {
        //TODO
    }

protected:

    inline void reallocate(Index diagSize, Index upperProfileSize, Index lowerProfileSize, Index upperSize, Index lowerSize) {

        Scalar* diag = new Scalar[diagSize];

BlockSparseMatrix.h:

508
509
510
511
512
513
514
515
516
517
518
519
      {
        //Browse each outer block

        //First, copy and save the indices of nonzero blocks
        //FIXME : find a way to avoid this ...
        std::vector<int> nzBlockIdx;
        typename MatrixType::InnerIterator it(blockPattern, bj);
        for(; it; ++it)
        {
          nzBlockIdx.push_back(it.index());
        }
        std::sort(nzBlockIdx.begin(), nzBlockIdx.end());

BlockSparseMatrix.h:

597
598
599
600
601
602
603
604
605
606
607
608
    {
      eigen_assert((m_innerBSize != 0 && m_outerBSize != 0) &&
          "TRYING TO RESERVE ZERO-SIZE MATRICES, CALL resize() first");

      //FIXME Should free if already allocated
      m_outerIndex = new StorageIndex[m_outerBSize+1];

      m_nonzerosblocks = nonzerosblocks;
      if(m_blockSize != Dynamic)
      {
        m_nonzeros = nonzerosblocks * (m_blockSize * m_blockSize);
        m_blockPtr = 0;

BlockSparseMatrix.h:

624
625
626
627
628
629
630
631
632
633
634
635
      * The InputIterator class should provide the functions row(), col() and value()
      *
      * \note For fixed-size blocks, call setBlockSize() before this function.
      *
      * FIXME Do not accept duplicates
      */
    template<typename InputIterator>
    void setFromTriplets(const InputIterator& begin, const InputIterator& end)
    {
      eigen_assert((m_innerBSize!=0 && m_outerBSize !=0) && "ZERO BLOCKS, PLEASE CALL resize() before");

      /* First, sort the triplet list

BlockSparseMatrix.h:

632
633
634
635
636
637
638
639
640
641
642
643
    {
      eigen_assert((m_innerBSize!=0 && m_outerBSize !=0) && "ZERO BLOCKS, PLEASE CALL resize() before");

      /* First, sort the triplet list
        * FIXME This can be unnecessarily expensive since only the inner indices have to be sorted
        * The best approach is like in SparseMatrix::setFromTriplets()
        */
      internal::TripletComp<InputIterator, IsColMajor> tripletcomp;
      std::sort(begin, end, tripletcomp);

      /* Count the number of rows and column blocks,
       * and the number of nonzero blocks per outer dimension

BlockSparseMatrix.h:

820
821
822
823
824
825
826
827
828
829
830
831
        return Map<BlockScalar>(&(m_values[blockPtr(offset)]), rsize, csize);
      }
      else
      {
        //FIXME the block does not exist, Insert it !!!!!!!!!
        eigen_assert("DYNAMIC INSERTION IS NOT YET SUPPORTED");
      }
    }

    /**
      * \returns the value of the (i,j) block as an Eigen Dense Matrix
      */

BlockSparseMatrix.h:

1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
    // block row index
    inline Index row() const  {return index(); }
    // block column index
    inline Index col() const {return outer(); }
    // FIXME Number of rows in the current block
    inline Index rows() const { return (m_mat.m_blockSize==Dynamic) ? (m_mat.m_innerOffset[index()+1] - m_mat.m_innerOffset[index()]) : m_mat.m_blockSize; }
    // Number of columns in the current block ...
    inline Index cols() const { return (m_mat.m_blockSize==Dynamic) ? (m_mat.m_outerOffset[m_outer+1]-m_mat.m_outerOffset[m_outer]) : m_mat.m_blockSize;}
    inline operator bool() const { return (m_id < m_end); }

  protected:
    const BlockSparseMatrix<_Scalar, _BlockAtCompileTime, _Options, StorageIndex>& m_mat;

DynamicSparseMatrix.h:

59
60
61
62
63
64
65
66
67
68
69
70
    typedef SparseMatrixBase<DynamicSparseMatrix> Base;
    using Base::convert_index;
  public:
    EIGEN_SPARSE_PUBLIC_INTERFACE(DynamicSparseMatrix)
    // FIXME: why are these operator already alvailable ???
    // EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(DynamicSparseMatrix, +=)
    // EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(DynamicSparseMatrix, -=)
    typedef MappedSparseMatrix<Scalar,Flags> Map;
    using Base::IsRowMajor;
    using Base::operator=;
    enum {
      Options = _Options

DynamicSparseMatrix.h:

213
214
215
216
217
218
219
220
221
222
223
224
      const Index innerSize = IsRowMajor ? cols : rows;
      if (m_innerSize>innerSize)
      {
        // remove all coefficients with innerCoord>=innerSize
        // TODO
        //std::cerr << "not implemented yet\n";
        exit(2);
      }
      if (m_data.size() != outerSize)
      {
        m_data.resize(outerSize);
      }

SpecialFunctionsImpl.h:

1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
    s += t1;
    s += ai;

    u = a * numext::log(x);
    // TODO: gamma() is not directly implemented in Eigen.
    /*
    if ((a + b) < maxgam && numext::abs(u) < maxlog) {
      t = gamma(a + b) / (gamma(a) * gamma(b));
      s = s * t * pow(x, a);
    } else {
    */
    t = lgamma_impl<double>::run(a + b) - lgamma_impl<double>::run(a) -

SpecialFunctionsImpl.h:

1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
        x  (1-x)   | (a+b) / ( a | (a) | (b) ) .   */

    y = a * numext::log(x);
    t = b * numext::log(xc);
    // TODO: gamma is not directly implemented in Eigen.
    /*
    if ((a + b) < maxgam && numext::abs(y) < maxlog && numext::abs(t) < maxlog)
    {
      t = pow(xc, b);
      t *= pow(x, a);
      t /= a;
      t *= w;

Complex.h:

82
83
84
85
86
87
88
89
90
91
92
93
}

template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
{
  // FIXME The following might be optimized using _mm256_movedup_pd
  Packet2cf a = ploaddup<Packet2cf>(from);
  Packet2cf b = ploaddup<Packet2cf>(from+1);
  return  Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
}

template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }

MathFunctions.h:

113
114
115
116
117
118
119
120
121
122
123
124
// Natural logarithm
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
// be easily approximated by a polynomial centered on m=1 for stability.
// TODO(gonnet): Further reduce the interval allowing for lower-degree
//               polynomial interpolants -> ... -> profit!
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
plog<Packet8f>(const Packet8f& _x) {
  Packet8f x = _x;
  _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
  _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);

MathFunctions.h:

245
246
247
248
249
250
251
252
253
254
255
256
#endif

  Packet8f r2 = pmul(r, r);

  // TODO(gonnet): Split into odd/even polynomials and try to exploit
  //               instruction-level parallelism.
  Packet8f y = p8f_cephes_exp_p0;
  y = pmadd(y, r, p8f_cephes_exp_p1);
  y = pmadd(y, r, p8f_cephes_exp_p2);
  y = pmadd(y, r, p8f_cephes_exp_p3);
  y = pmadd(y, r, p8f_cephes_exp_p4);
  y = pmadd(y, r, p8f_cephes_exp_p5);

MathFunctions.h:

331
332
333
334
335
336
337
338
339
340
341
342
  qx = pmadd(qx, x2, p4d_cephes_exp_q2);
  qx = pmadd(qx, x2, p4d_cephes_exp_q3);

  // I don't really get this bit, copied from the SSE2 routines, so...
  // TODO(gonnet): Figure out what is going on here, perhaps find a better
  // rational interpolant?
  x = _mm256_div_pd(px, psub(qx, px));
  x = pmadd(p4d_2, x, p4d_1);

  // Build e=2^n by constructing the exponents in a 128-bit vector and
  // shifting them to where they belong in double-precision values.
  __m128i emm0 = _mm256_cvtpd_epi32(fx);

PacketMath.h:

220
221
222
223
224
225
226
227
228
229
230
// Loads 4 floats from memory a returns the packet {a0, a0  a1, a1, a2, a2, a3, a3}
template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
{
  // TODO try to find a way to avoid the need of a temporary register
//   Packet8f tmp  = _mm256_castps128_ps256(_mm_loadu_ps(from));
//   tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1);
//   return _mm256_unpacklo_ps(tmp,tmp);

  // _mm256_insertf128_ps is very slow on Haswell, thus:
  Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
  // mimic an "inplace" permutation of the lower 128bits using a blend

PacketMath.h:

353
354
355
356
357
358
359
360
361
362
363
  return _mm256_and_pd(a,mask);
}

// preduxp should be ok
// FIXME: why is this ok? why isn't the simply implementation working as expected?
template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
{
    __m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
    __m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
    __m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
    __m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);

MathFunctions.h:

159
160
161
162
163
164
165
166
167
168
169
170
  _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
  Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
  Packet16f r2 = pmul(r, r);

  // TODO(gonnet): Split into odd/even polynomials and try to exploit
  //               instruction-level parallelism.
  Packet16f y = p16f_cephes_exp_p0;
  y = pmadd(y, r, p16f_cephes_exp_p1);
  y = pmadd(y, r, p16f_cephes_exp_p2);
  y = pmadd(y, r, p16f_cephes_exp_p3);
  y = pmadd(y, r, p16f_cephes_exp_p4);
  y = pmadd(y, r, p16f_cephes_exp_p5);

MathFunctions.h:

233
234
235
236
237
238
239
240
241
242
243
244
  qx = pmadd(qx, x2, p8d_cephes_exp_q2);
  qx = pmadd(qx, x2, p8d_cephes_exp_q3);

  // I don't really get this bit, copied from the SSE2 routines, so...
  // TODO(gonnet): Figure out what is going on here, perhaps find a better
  // rational interpolant?
  x = _mm512_div_pd(px, psub(qx, px));
  x = pmadd(p8d_2, x, p8d_1);

  // Build e=2^n.
  const Packet8d e = _mm512_castsi512_pd(_mm512_slli_epi64(
      _mm512_add_epi64(_mm512_cvtpd_epi64(n), _mm512_set1_epi64(1023)), 52));

PacketMath.h:

81
82
83
84
85
86
87
88
89
90
91
92
    HasDiv = 1
  };
};

/* TODO Implement AVX512 for integers
template<> struct packet_traits<int>    : default_packet_traits
{
  typedef Packet16i type;
  enum {
    Vectorizable = 1,
    AlignedOnScalar = 1,
    size=8

Complex.h:

227
228
229
230
231
232
233
234
235
236
237
238
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)

template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
  // TODO optimize it for AltiVec
  Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
  Packet4f s = pmul<Packet4f>(b.v, b.v);
  return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
}

template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
{

Complex.h:

362
363
364
365
366
367
368
369
370
371
372
373
struct palign_impl<Offset,Packet1cd>
{
  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
  {
    // FIXME is it sure we never have to align a Packet1cd?
    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
  }
};

template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const

Complex.h:

404
405
406
407
408
409
410
411
412
413
414
415
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)

template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
  // TODO optimize it for AltiVec
  Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
  Packet2d s = pmul<Packet2d>(b.v, b.v);
  return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
}

EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
{

Complex.h:

268
269
270
271
272
273
274
275
276
277
278
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)

template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
  // TODO optimize it for NEON
  Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
  Packet4f s, rev_s;

  // this computes the norm
  s = vmulq_f32(b.v, b.v);
  rev_s = vrev64q_f32(s);

Complex.h:

419
420
421
422
423
424
425
426
427
428
429
430
struct palign_impl<Offset,Packet1cd>
{
  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
  {
    // FIXME is it sure we never have to align a Packet1cd?
    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
  }
};

template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const

Complex.h:

461
462
463
464
465
466
467
468
469
470
471
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)

template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
  // TODO optimize it for NEON
  Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
  Packet2d s = pmul<Packet2d>(b.v, b.v);
  Packet2d rev_s = preverse<Packet2d>(s);

  return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
}

PacketMath.h:

108
109
110
111
112
113
114
115
116
117
118
119
    size = 4,
    HasHalfPacket=0, // Packet2f intrinsics not implemented yet

    HasDiv  = 1,
    // FIXME check the Has*
    HasSin  = 0,
    HasCos  = 0,
    HasLog  = 0,
    HasExp  = 1,
    HasSqrt = 0
  };
};

PacketMath.h:

125
126
127
128
129
130
131
132
133
134
135
136
    Vectorizable = 1,
    AlignedOnScalar = 1,
    size=4,
    HasHalfPacket=0 // Packet2i intrinsics not implemented yet
    // FIXME check the Has*
  };
};

#if EIGEN_GNUC_AT_MOST(4,4) && !EIGEN_COMP_LLVM
// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }

PacketMath.h:

337
338
339
340
341
342
343
344
345
346
template<> EIGEN_STRONG_INLINE void prefetch<float>  (const float*    addr) { EIGEN_ARM_PREFETCH(addr); }
template<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t*  addr) { EIGEN_ARM_PREFETCH(addr); }

// FIXME only store the 2 first elements ?
template<> EIGEN_STRONG_INLINE float   pfirst<Packet4f>(const Packet4f& a) { float   EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
template<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }

template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
  float32x2_t a_lo, a_hi;
  Packet4f a_r64;

PacketMath.h:

596
597
598
599
600
601
602
603
604
605
606
607
    size = 2,
    HasHalfPacket=0,

    HasDiv  = 1,
    // FIXME check the Has*
    HasSin  = 0,
    HasCos  = 0,
    HasLog  = 0,
    HasExp  = 0,
    HasSqrt = 0
  };
};

PacketMath.h:

685
686
687
688
689
690
691
692
693
694
695
696
  to[stride*1] = vgetq_lane_f64(from, 1);
}
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ARM_PREFETCH(addr); }

// FIXME only store the 2 first elements ?
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); }

template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }

template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }

#if EIGEN_COMP_CLANG && defined(__apple_build_version__)

Complex.h:

232
233
234
235
236
237
238
239
240
241
242
243
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)

template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
  // TODO optimize it for SSE3 and 4
  Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
  __m128 s = _mm_mul_ps(b.v,b.v);
  return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
}

EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)
{

Complex.h:

309
310
311
312
313
314
315
316
317
318
319
template<> EIGEN_STRONG_INLINE Packet1cd por    <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }

// FIXME force unaligned load, this is a temporary fix
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>&  from)
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }

Complex.h:

319
320
321
322
323
324
325
326
327
328
329
330
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }

template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }

// FIXME force unaligned store, this is a temporary fix
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }

template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }

template<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)
{

Complex.h:

354
355
356
357
358
359
360
361
362
363
364
365
struct palign_impl<Offset,Packet1cd>
{
  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
  {
    // FIXME is it sure we never have to align a Packet1cd?
    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
  }
};

template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const

Complex.h:

417
418
419
420
421
422
423
424
425
426
427
428
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)

template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
  // TODO optimize it for SSE3 and 4
  Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
  __m128d s = _mm_mul_pd(b.v,b.v);
  return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
}

EIGEN_STRONG_INLINE Packet1cd pcplxflip/* <Packet1cd> */(const Packet1cd& x)
{

PacketMath.h:

168
169
170
171
172
173
174
175
176
177
178
179
#endif

#if EIGEN_COMP_MSVC==1500
// Workaround MSVC 9 internal compiler error.
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps(from,from,from,from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set_epi32(from,from,from,from); }
#else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps1(from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }

PacketMath.h:

169
170
171
172
173
174
175
176
177
178
179
#if EIGEN_COMP_MSVC==1500
// Workaround MSVC 9 internal compiler error.
// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps(from,from,from,from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set_epi32(from,from,from,from); }
#else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps1(from); }
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set1_epi32(from); }

PacketMath.h:

607
608
609
610
611
612
613
614
615
616
617
618
template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
{
  // after some experiments, it is seems this is the fastest way to implement it
  // for GCC (eg., reusing pmul is very slow !)
  // TODO try to call _mm_mul_epu32 directly
  EIGEN_ALIGN16 int aux[4];
  pstore(aux, a);
  return  (aux[0] * aux[1]) * (aux[2] * aux[3]);;
}

// min
template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)

Complex.h:

252
253
254
255
256
257
258
259
260
261
262
263
struct palign_impl<Offset,Packet1cd>
{
  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
  {
    // FIXME is it sure we never have to align a Packet1cd?
    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
  }
};

template<int Offset>
struct palign_impl<Offset,Packet2cf>
{

Complex.h:

340
341
342
343
344
345
346
347
348
349
350
351
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)

template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
  // TODO optimize it for AltiVec
  Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
  Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
  return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
}

template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{

Complex.h:

348
349
350
351
352
353
354
355
356
357
358
359
}

template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
  // TODO optimize it for AltiVec
  Packet2cf res;
  res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
  res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
  return res;
}

EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)

PacketMath.h:

336
337
338
339
340
341
342
343
344
345
346
347
};

template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from)
{
  // FIXME: No intrinsic yet
  EIGEN_DEBUG_ALIGNED_LOAD
  Packet *vfrom;
  vfrom = (Packet *) from;
  return vfrom->v4i;
}

template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float*   from)

PacketMath.h:

345
346
347
348
349
350
351
352
353
354
355
}

template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float*   from)
{
  // FIXME: No intrinsic yet
  EIGEN_DEBUG_ALIGNED_LOAD
  Packet4f vfrom;
  vfrom.v4f[0] = vec_ld2f(&from[0]);
  vfrom.v4f[1] = vec_ld2f(&from[2]);
  return vfrom;
}

PacketMath.h:

355
356
357
358
359
360
361
362
363
364
365
366
}

template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
{
  // FIXME: No intrinsic yet
  EIGEN_DEBUG_ALIGNED_LOAD
  Packet *vfrom;
  vfrom = (Packet *) from;
  return vfrom->v2d;
}

template<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet4i& from)

PacketMath.h:

364
365
366
367
368
369
370
371
372
373
374
375
}

template<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet4i& from)
{
  // FIXME: No intrinsic yet
  EIGEN_DEBUG_ALIGNED_STORE
  Packet *vto;
  vto = (Packet *) to;
  vto->v4i = from;
}

template<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet4f& from)

PacketMath.h:

373
374
375
376
377
378
379
380
381
382
383
384
}

template<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet4f& from)
{
  // FIXME: No intrinsic yet
  EIGEN_DEBUG_ALIGNED_STORE
  vec_st2f(from.v4f[0], &to[0]);
  vec_st2f(from.v4f[1], &to[2]);
}


template<> EIGEN_STRONG_INLINE void pstore<double>(double*   to, const Packet2d& from)

PacketMath.h:

382
383
384
385
386
387
388
389
390
391
template<> EIGEN_STRONG_INLINE void pstore<double>(double*   to, const Packet2d& from)
{
  // FIXME: No intrinsic yet
  EIGEN_DEBUG_ALIGNED_STORE
  Packet *vto;
  vto = (Packet *) to;
  vto->v2d = from;
}

template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from)

TensorAssign.h:

113
114
115
116
117
118
119
120
121
122
123
124
  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
  {
    // The dimensions of the lhs and the rhs tensors should be equal to prevent
    // overflows and ensure the result is fully initialized.
    // TODO: use left impl instead if right impl dimensions are known at compile time.
    return m_rightImpl.dimensions();
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
    eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
    m_leftImpl.evalSubExprsIfNeeded(NULL);
    // If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non

TensorBroadcasting.h:

166
167
168
169
170
171
172
173
174
175
176
177
      return coeffRowMajor(index);
    }
  }

  // TODO: attempt to speed this up. The integer divisions and modulo are slow
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const
  {
    Index inputIndex = 0;
    for (int i = NumDims - 1; i > 0; --i) {
      const Index idx = index / m_outputStrides[i];
      if (internal::index_statically_eq<Broadcast>(i, 1)) {
        eigen_assert(idx < m_impl.dimensions()[i]);

TensorBroadcasting.h:

280
281
282
283
284
285
286
287
288
289
290
291
      }
    }
    inputIndex += innermostLoc;

    // Todo: this could be extended to the second dimension if we're not
    // broadcasting alongside the first dimension, and so on.
    if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) {
      return m_impl.template packet<Unaligned>(inputIndex);
    } else {
      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
      values[0] = m_impl.coeff(inputIndex);
      for (int i = 1; i < PacketSize; ++i) {

TensorBroadcasting.h:

332
333
334
335
336
337
338
339
340
341
342
343
      }
    }
    inputIndex += innermostLoc;

    // Todo: this could be extended to the second dimension if we're not
    // broadcasting alongside the first dimension, and so on.
    if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) {
      return m_impl.template packet<Unaligned>(inputIndex);
    } else {
      EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
      values[0] = m_impl.coeff(inputIndex);
      for (int i = 1; i < PacketSize; ++i) {

TensorConcatenation.h:

175
176
177
178
179
180
181
182
183
184
185
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }

  // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear?
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/)
  {
    m_leftImpl.evalSubExprsIfNeeded(NULL);
    m_rightImpl.evalSubExprsIfNeeded(NULL);
    return true;
  }

TensorConcatenation.h:

189
190
191
192
193
194
195
196
197
198
199
200
    m_leftImpl.cleanup();
    m_rightImpl.cleanup();
  }

  // TODO(phli): attempt to speed this up. The integer divisions and modulo are slow.
  // See CL/76180724 comments for more ideas.
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
  {
    // Collect dimension-wise indices (subs).
    array<Index, NumDims> subs;
    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
      for (int i = NumDims - 1; i > 0; --i) {

TensorConcatenation.h:

243
244
245
246
247
248
249
250
251
252
253
      return m_rightImpl.coeff(right_index);
    }
  }

  // TODO(phli): Add a real vectorization.
  template<int LoadMode>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
  {
    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
    EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    eigen_assert(index + packetSize - 1 < dimensions().TotalSize());

TensorContractionCuda.h:

429
430
431
432
433
434
435
436
437
438
439
  //     each do 8 writes into global memory. We can just overwrite the shared
  //     memory from the problem we just solved.
  // (2) is slightly faster than (1) due to less branching and more ILP

  // TODO: won't yield much gain, but could just use currently unused shared mem
  //       and then we won't have to sync
  // wait for shared mem to be out of use
  __syncthreads();

#define writeResultShmem(i, j)                                          \
  lhs_shmem[i + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j] = res(i, j); \

TensorContractionCuda.h:

465
466
467
468
469
470
471
472
473
474
475
476
  const int max_j_write = numext::mini((int)((n_size - base_n - threadIdx.z + 7) / 8), 8);

  if (threadIdx.x < max_i_write) {
    if (max_j_write == 8) {
      // TODO: can i trade bank conflicts for coalesced writes?
      Scalar val0 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 0];
      Scalar val1 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 1];
      Scalar val2 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 2];
      Scalar val3 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 3];
      Scalar val4 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 4];
      Scalar val5 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 5];
      Scalar val6 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 6];

TensorContractionMapper.h:

344
345
346
347
348
349
350
351
352
353
354
355
  typedef Self LinearMapper;

  enum {
    // We can use direct offsets iff the parent mapper supports then and we can compute the strides.
    // TODO: we should also enable direct offsets for the Rhs case.
    UseDirectOffsets = ParentMapper::DirectOffsets && (side == Lhs) && inner_dim_contiguous && (array_size<contract_t>::value > 0)
  };

  EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)
      : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) {
    // Bake the offsets into the buffer used by the base mapper whenever possible. This avoids the need to recompute
    // this offset every time we attempt to access a coefficient.

TensorContractionThreadPool.h:

204
205
206
207
208
209
210
211
212
213
214
215
        contractionCost(m, n, bm, bn, bk, shard_by_col, false);
    int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
        static_cast<double>(n) * m, cost, this->m_device.numThreads());

    // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost
    // model is not tuned. Remove this when the cost model is tuned.
    if (n == 1) num_threads = 1;

    if (num_threads == 1) {
      // The single-threaded algorithm should be faster in this case.
      if (n == 1)
        this->template evalGemv<lhs_inner_dim_contiguous,

TensorContractionThreadPool.h:

386
387
388
389
390
391
392
393
394
395
396
397
    void run() {
      // Kick off packing of the first slice.
      signal_switch(0, 1);
      // Wait for overall completion.
      // TODO(dvyukov): this wait can lead to deadlock.
      // If nthreads contractions are concurrently submitted from worker
      // threads, this wait will block all worker threads and the system will
      // deadlock.
      done_.Wait();
    }

   private:

TensorContractionThreadPool.h:

770
771
772
773
774
775
776
777
778
779
780
781
                                                   rhs_inner_dim_reordered, Unaligned> RhsMapper;

    typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;

    // TODO: packing could be faster sometimes if we supported row major tensor mappers
    typedef internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, Traits::mr,
                                    Traits::LhsProgress, ColMajor> LhsPacker;
    typedef internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> RhsPacker;

    // TODO: replace false, false with conjugate values?
    typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
                                  Traits::mr, Traits::nr, false, false> GebpKernel;

TensorContractionThreadPool.h:

775
776
777
778
779
780
781
782
783
784
785
786
    typedef internal::gemm_pack_lhs<LhsScalar, Index, typename LhsMapper::SubMapper, Traits::mr,
                                    Traits::LhsProgress, ColMajor> LhsPacker;
    typedef internal::gemm_pack_rhs<RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> RhsPacker;

    // TODO: replace false, false with conjugate values?
    typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
                                  Traits::mr, Traits::nr, false, false> GebpKernel;

    typedef internal::packLhsArg<LhsScalar, LhsMapper, Index> packLArg;
    typedef internal::packRhsAndKernelArg<LhsScalar, RhsScalar, RhsMapper, OutputMapper, Index> packRKArg;

    // initialize data mappers

TensorContractionThreadPool.h:

826
827
828
829
830
831
832
833
834
835
836
      blockAs.push_back(static_cast<LhsScalar *>(this->m_device.allocate(sizeA * sizeof(LhsScalar))));
    }

    // To circumvent alignment issues, I'm just going to separately allocate the memory for each thread
    // TODO: is this too much memory to allocate? This simplifies coding a lot, but is wasteful.
    //       Other options: (1) reuse memory when a thread finishes. con: tricky
    //                      (2) allocate block B memory in each thread. con: overhead
    MaxSizeVector<RhsScalar *> blockBs(n_blocks);
    for (int i = 0; i < n_blocks; i++) {
      blockBs.push_back(static_cast<RhsScalar *>(this->m_device.allocate(sizeB * sizeof(RhsScalar))));
    }

TensorConvolution.h:

1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
  costPerCoeff(bool vectorized) const {
    // TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost
    // model.
    const double kernel_size = m_kernelImpl.dimensions().TotalSize();
    // We ignore the use of fused multiply-add.
    const double convolve_compute_cost =
        TensorOpCost::AddCost<Scalar>() + TensorOpCost::MulCost<Scalar>();
    const double firstIndex_compute_cost =
        NumDims *

TensorCostModel.h:

23
24
25
26
27
28
29
30
31
32
33
34
// Class storing the cost of evaluating a tensor expression in terms of the
// estimated number of operand bytes loads, bytes stored, and compute cycles.
class TensorOpCost {
 public:
  // TODO(rmlarsen): Fix the scalar op costs in Eigen proper. Even a simple
  // model based on minimal reciprocal throughput numbers from Intel or
  // Agner Fog's tables would be better than what is there now.
  template <typename ArgType>
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost() {
    return internal::functor_traits<
        internal::scalar_product_op<ArgType, ArgType> >::Cost;
  }

TensorCostModel.h:

 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
    bytes_loaded_ = 0;
    bytes_stored_ = 0;
  }

  // TODO(rmlarsen): Define min in terms of total cost, not elementwise.
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin(
      const TensorOpCost& rhs) const {
    double bytes_loaded = numext::mini(bytes_loaded_, rhs.bytes_loaded());
    double bytes_stored = numext::mini(bytes_stored_, rhs.bytes_stored());
    double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles());
    return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles);
  }

TensorCostModel.h:

101
102
103
104
105
106
107
108
109
110
111
112
    double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles());
    return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles);
  }

  // TODO(rmlarsen): Define max in terms of total cost, not elementwise.
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax(
      const TensorOpCost& rhs) const {
    double bytes_loaded = numext::maxi(bytes_loaded_, rhs.bytes_loaded());
    double bytes_stored = numext::maxi(bytes_stored_, rhs.bytes_stored());
    double compute_cycles = numext::maxi(compute_cycles_, rhs.compute_cycles());
    return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles);
  }

TensorCostModel.h:

153
154
155
156
157
158
159
160
161
162
163
164
  double bytes_stored_;
  double compute_cycles_;
};

// TODO(rmlarsen): Implement a policy that chooses an "optimal" number of theads
// in [1:max_threads] instead of just switching multi-threading off for small
// work units.
template <typename Device>
class TensorCostModel {
 public:
  // Scaling from Eigen compute cost to device cycles.
  static const int kDeviceCyclesPerComputeCycle = 1;

TensorCustomOp.h:

133
134
135
136
137
138
139
140
141
142
143
144
    return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    // TODO(rmlarsen): Extend CustomOp API to return its cost estimate.
    return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
  }

  EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }

 protected:
  EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {

TensorCustomOp.h:

288
289
290
291
292
293
294
295
296
297
298
299
    return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    // TODO(rmlarsen): Extend CustomOp API to return its cost estimate.
    return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
  }

  EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }

 protected:
  EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {

TensorDevice.h:

20
21
22
23
24
25
26
27
28
29
30
31
  *
  * Example:
  *    C.device(EIGEN_GPU) = A + B;
  *
  * Todo: operator *= and /=.
  */

template <typename ExpressionType, typename DeviceType> class TensorDevice {
  public:
    TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {}

    template<typename OtherDerived>

TensorDeviceCuda.h:

188
189
190
191
192
193
194
195
196
197
198
199
  }
  explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) {
    eigen_assert(stream);
  }
  // TODO(bsteiner): This is an internal API, we should not expose it.
  EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
    return stream_->stream();
  }

  EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
    return stream_->allocate(num_bytes);
  }

TensorDeviceCuda.h:

245
246
247
248
249
250
251
252
253
254
255
256
#endif
  }

  EIGEN_STRONG_INLINE size_t numThreads() const {
    // FIXME
    return 32;
  }

  EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
    // FIXME
    return 48*1024;
  }

TensorDeviceCuda.h:

250
251
252
253
254
255
256
257
258
259
260
261
    return 32;
  }

  EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
    // FIXME
    return 48*1024;
  }

  EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
    // We won't try to take advantage of the l2 cache for the time being, and
    // there is no l3 cache on cuda devices.
    return firstLevelCacheSize();

TensorDeviceCuda.h:

318
319
320
321
322
323
324
325
326
327
328
329
  (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__);   \
  assert(cudaGetLastError() == cudaSuccess);


// FIXME: Should be device and kernel specific.
#ifdef __CUDACC__
static EIGEN_DEVICE_FUNC inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
#ifndef __CUDA_ARCH__
  cudaError_t status = cudaDeviceSetSharedMemConfig(config);
  EIGEN_UNUSED_VARIABLE(status)
  assert(status == cudaSuccess);
#else

TensorDimensions.h:

104
105
106
107
108
109
110
111
112
113
114
  EIGEN_DEVICE_FUNC Sizes() { }
  template <typename DenseIndex>
  explicit EIGEN_DEVICE_FUNC Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
    // todo: add assertion
  }
#if EIGEN_HAS_VARIADIC_TEMPLATES
  template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { }
  explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
    // todo: add assertion
  }
#endif

TensorDimensions.h:

109
110
111
112
113
114
115
116
117
118
119
120
  }
#if EIGEN_HAS_VARIADIC_TEMPLATES
  template <typename... DenseIndex> EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { }
  explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list<std::ptrdiff_t> /*l*/) {
    // todo: add assertion
  }
#endif

  template <typename T> Sizes& operator = (const T& /*other*/) {
    // add assertion failure if the size of other is different
    return *this;
  }

TensorDimensions.h:

166
167
168
169
170
171
172
173
174
175
176
  Sizes() { }
  template <typename DenseIndex>
  explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
    // todo: add assertion
  }
  template <typename T> Sizes& operator = (const T& /*other*/) {
    // add assertion failure if the size of other is different
    return *this;
  }

#if EIGEN_HAS_VARIADIC_TEMPLATES

TensorDimensions.h:

176
177
178
179
180
181
182
183
184
185
186
#if EIGEN_HAS_VARIADIC_TEMPLATES
  template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { }
  explicit Sizes(std::initializer_list<std::size_t>) {
    // todo: add assertion
  }
#else
  EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) {
  }
  EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex) {
  }
  EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex) {

TensorEvaluator.h:

18
19
20
21
22
23
24
25
26
27
28
29
  * \brief The tensor evaluator classes.
  *
  * These classes are responsible for the evaluation of the tensor expression.
  *
  * TODO: add support for more types of expressions, in particular expressions
  * leading to lvalues (slicing, reshaping, etc...)
  */

// Generic evaluator
template<typename Derived, typename Device>
struct TensorEvaluator
{

TensorEvaluator.h:

388
389
390
391
392
393
394
395
396
397
398
399
  typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;

  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
  {
    // TODO: use right impl instead if right impl dimensions are known at compile time.
    return m_leftImpl.dimensions();
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
    m_leftImpl.evalSubExprsIfNeeded(NULL);
    m_rightImpl.evalSubExprsIfNeeded(NULL);
    return true;

TensorEvaluator.h:

483
484
485
486
487
488
489
490
491
492
493
494
  typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;

  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
  {
    // TODO: use arg2 or arg3 dimensions if they are known at compile time.
    return m_arg1Impl.dimensions();
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
    m_arg1Impl.evalSubExprsIfNeeded(NULL);
    m_arg2Impl.evalSubExprsIfNeeded(NULL);
    m_arg3Impl.evalSubExprsIfNeeded(NULL);

TensorEvaluator.h:

573
574
575
576
577
578
579
580
581
582
583
584
  typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;

  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
  {
    // TODO: use then or else impl instead if they happen to be known at compile time.
    return m_condImpl.dimensions();
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
    m_condImpl.evalSubExprsIfNeeded(NULL);
    m_thenImpl.evalSubExprsIfNeeded(NULL);
    m_elseImpl.evalSubExprsIfNeeded(NULL);

TensorExpr.h:

80
81
82
83
84
85
86
87
88
89
90
91
template<typename UnaryOp, typename XprType>
struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> >
    : traits<XprType>
{
  // TODO(phli): Add InputScalar, InputPacket.  Check references to
  // current Scalar/Packet to see if the intent is Input or Output.
  typedef typename result_of<UnaryOp(typename XprType::Scalar)>::type Scalar;
  typedef traits<XprType> XprTraits;
  typedef typename XprType::Nested XprTypeNested;
  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
  static const int NumDimensions = XprTraits::NumDimensions;
  static const int Layout = XprTraits::Layout;

TensorExpr.h:

110
111
112
113
114
115
116
117
118
119
120
121
template<typename UnaryOp, typename XprType>
class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType>, ReadOnlyAccessors>
{
  public:
    // TODO(phli): Add InputScalar, InputPacket.  Check references to
    // current Scalar/Packet to see if the intent is Input or Output.
    typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Scalar Scalar;
    typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
    typedef Scalar CoeffReturnType;
    typedef typename Eigen::internal::nested<TensorCwiseUnaryOp>::type Nested;
    typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::StorageKind StorageKind;
    typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Index Index;

TensorExpr.h:

142
143
144
145
146
147
148
149
150
151
152
153
struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >
{
  // Type promotion to handle the case where the types of the lhs and the rhs
  // are different.
  // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket.  Check references to
  // current Scalar/Packet to see if the intent is Inputs or Output.
  typedef typename result_of<
      BinaryOp(typename LhsXprType::Scalar,
               typename RhsXprType::Scalar)>::type Scalar;
  typedef traits<LhsXprType> XprTraits;
  typedef typename promote_storage_type<
      typename traits<LhsXprType>::StorageKind,

TensorExpr.h:

186
187
188
189
190
191
192
193
194
195
196
197
template<typename BinaryOp, typename LhsXprType, typename RhsXprType>
class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType>, ReadOnlyAccessors>
{
  public:
    // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket.  Check references to
    // current Scalar/Packet to see if the intent is Inputs or Output.
    typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Scalar Scalar;
    typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
    typedef Scalar CoeffReturnType;
    typedef typename Eigen::internal::nested<TensorCwiseBinaryOp>::type Nested;
    typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::StorageKind StorageKind;
    typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Index Index;

TensorFFT.h:

20
21
22
23
24
25
26
27
28
29
30
31
  * \ingroup CXX11_Tensor_Module
  *
  * \brief Tensor FFT class.
  *
  * TODO:
  * Vectorize the Cooley Tukey and the Bluestein algorithm
  * Add support for multithreaded evaluation
  * Improve the performance on GPU
  */

template <bool NeedUprade> struct MakeComplex {
  template <typename T>

TensorFixedSize.h:

335
336
337
338
339
340
341
342
343
344
345
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE TensorFixedSize& operator=(const TensorFixedSize& other)
    {
      // FIXME: check that the dimensions of other match the dimensions of *this.
      // Unfortunately this isn't possible yet when the rhs is an expression.
      typedef TensorAssignOp<Self, const TensorFixedSize> Assign;
      Assign assign(*this, other);
      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
      return *this;
    }
    template<typename OtherDerived>

TensorFixedSize.h:

346
347
348
349
350
351
352
353
354
355
356
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE TensorFixedSize& operator=(const OtherDerived& other)
    {
      // FIXME: check that the dimensions of other match the dimensions of *this.
      // Unfortunately this isn't possible yet when the rhs is an expression.
      typedef TensorAssignOp<Self, const OtherDerived> Assign;
      Assign assign(*this, other);
      internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
      return *this;
    }

TensorGenerator.h:

146
147
148
149
150
151
152
153
154
155
156
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
  costPerCoeff(bool) const {
    // TODO(rmlarsen): This is just a placeholder. Define interface to make
    // generators return their cost.
    return TensorOpCost(0, 0, TensorOpCost::AddCost<Scalar>() +
                                  TensorOpCost::MulCost<Scalar>());
  }

  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }

TensorInflation.h:

183
184
185
186
187
188
189
190
191
192
193
     return Scalar(0);
    }
  }

  // TODO(yangke): optimize this function so that we can detect and produce
  // all-zero packets
  template<int LoadMode>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
  {
    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    eigen_assert(index+PacketSize-1 < dimensions().TotalSize());

TensorMorphing.h:

280
281
282
283
284
285
286
287
288
289
290
291
    const Sizes m_sizes;
};


// Fixme: figure out the exact threshold
namespace {
template <typename Index, typename Device> struct MemcpyTriggerForSlicing {
  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
  EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; }

 private:
  Index threshold_;

TensorRandom.h:

55
56
57
58
59
60
61
62
63
64
65
#endif
}

static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) {
  // TODO: Unify with the implementation in the non blocking thread pool.
  uint64_t current = *state;
  // Update the internal state
  *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
  // Generate the random output (using the PCG-XSH-RS scheme)
  return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
}

TensorReduction.h:

605
606
607
608
609
610
611
612
613
614
615
616
      return reducer.finalize(accum);
    }
  }

  // TODO(bsteiner): provide a more efficient implementation.
  template<int LoadMode>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
  {
    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions())));

    if (RunningOnGPU && m_result) {

TensorReverse.h:

197
198
199
200
201
202
203
204
205
206
207
208
  {
    EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    eigen_assert(index+PacketSize-1 < dimensions().TotalSize());

    // TODO(ndjaitly): write a better packing routine that uses
    // local structure.
    EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
                                                            values[PacketSize];
    for (int i = 0; i < PacketSize; ++i) {
      values[i] = coeff(index+i);
    }
    PacketReturnType rslt = internal::pload<PacketReturnType>(values);

TensorScan.h:

207
208
209
210
211
212
213
214
215
216
217
218
  CoeffReturnType* m_output;
};

// CPU implementation of scan
// TODO(ibab) This single-threaded implementation should be parallelized,
// at least by running multiple scans at the same time.
template <typename Self, typename Reducer, typename Device>
struct ScanLauncher {
  void operator()(Self& self, typename Self::CoeffReturnType *data) {
    Index total_size = internal::array_prod(self.dimensions());

    // We fix the index along the scan axis to 0 and perform a

TensorScan.h:

243
244
245
246
247
248
249
250
251
252
253
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)

// GPU implementation of scan
// TODO(ibab) This placeholder implementation performs multiple scans in
// parallel, but it would be better to use a parallel scan algorithm and
// optimize memory access.
template <typename Self, typename Reducer>
__global__ void ScanKernel(Self self, Index total_size, typename Self::CoeffReturnType* data) {
  // Compute offset as in the CPU version
  Index val = threadIdx.x + blockIdx.x * blockDim.x;
  Index offset = (val / self.stride()) * self.stride() * self.size() + val % self.stride();

TensorTraits.h:

165
166
167
168
169
170
171
172
173
174
175
176
{
  typedef const TensorRef<PlainObjectType>& type;
};

// TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector.
template<typename T, int n=1, typename PlainObject = void> struct nested
{
  typedef typename ref_selector<T>::type type;
};

template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
struct nested<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >

NonBlockingThreadPool.h:

144
145
146
147
148
149
150
151
152
153
154
155
      if (!t.f) {
        t = Steal();
        if (!t.f) {
          // Leave one thread spinning. This reduces latency.
          // TODO(dvyukov): 1000 iterations is based on fair dice roll, tune it.
          // Also, the time it takes to attempt to steal work 1000 times depends
          // on the size of the thread pool. However the speed at which the user
          // of the thread pool submit tasks is independent of the size of the
          // pool. Consider a time based limit instead.
          if (!spinning_ && !spinning_.exchange(true)) {
            for (int i = 0; i < 1000 && !t.f; i++) {
              t = Steal();

EmulateArray.h:

177
178
179
180
181
182
183
184
185
186
187
188
  T dummy;
};

// Comparison operator
// Todo: implement !=, <, <=, >,  and >=
template<class T, std::size_t N>
EIGEN_DEVICE_FUNC bool operator==(const array<T,N>& lhs, const array<T,N>& rhs) {
  for (std::size_t i = 0; i < N; ++i) {
    if (lhs[i] != rhs[i]) {
      return false;
    }
  }

test_exceptions_greedy.py:

39
40
41
42
43
44
45
46
47
48
49
    # Bad `m`
    x = map[1, 3]
    assert len(x) == 0
    # TODO: It would be nice if `map[1, 3] = ...` raised
    # an error, but currently it does nothing (silently).

    # Bad type
    with pytest.raises(ValueError) as e:
        x = map[2.3, 5.7]
    with pytest.raises(ValueError) as e:
        map[2.3, 5.7] = 1

test_light_delay_greedy.py:

 9
10
11
12
13
    sec = starry.Secondary(starry.Map(), porb=1.0)
    sys = starry.System(pri, sec, light_delay=True)
    assert sys.light_delay is True

    # TODO: Add tests here.

test_system_greedy.py:

83
84
85
86
87
    sys = starry.System(pri, sec)
    t = np.concatenate((np.linspace(0.1, 0.4, 50), np.linspace(0.6, 0.9, 50)))
    flux = sys.flux(t)

    # TODO: Add an analytic validation here