/*********************************************************************/
/* File:   h1hofe.cpp                                                */
/* Author: Start                                                      */
/* Date:   6. Feb. 2003                                              */
/*********************************************************************/

 
#include <fem.hpp>
namespace ngfem
{
  using namespace ngfem;
  
  H1HighOrderFiniteElement ::   
  H1HighOrderFiniteElement (int dim, ELEMENT_TYPE aeltype)
    : NodalFiniteElement (dim, aeltype, -1, -1) 
  { 
    for (int i = 0; i < 8; i++)
      vnums[i] = i;
    
    augmented = 0;
  }
   
  void H1HighOrderFiniteElement::
  SetVertexNumbers (FlatArray<int> & avnums)
  {
    for (int i = 0; i < avnums.Size(); i++)
      vnums[i] = avnums[i];
  }
    
  
  void H1HighOrderFiniteElement::
  SetOrderInner (int oi)
  {
    order_inner = oi;
    ComputeNDof();
  }

  void H1HighOrderFiniteElement::
  SetAugmented (int aa)
  {
    augmented = aa;
    ComputeNDof();
  }

  
  void H1HighOrderFiniteElement::
  SetOrderFace (FlatArray<int> & of)
  {
    for (int i = 0; i < of.Size(); i++)
      order_face[i] = of[i];
    ComputeNDof();
  }

  
  void H1HighOrderFiniteElement::
  SetOrderEdge (FlatArray<int> & oe)
  {
    for (int i = 0; i < oe.Size(); i++)
      order_edge[i] = oe[i];
    ComputeNDof();
  }





  /* *********************** Segment  **********************/


  template <class T_ORTHOPOL>
  H1HighOrderSegm<T_ORTHOPOL> :: H1HighOrderSegm (int aorder)
    : H1HighOrderFiniteElement(1, ET_SEGM)
  {
    order_inner = aorder;
    ComputeNDof();
  }


  template <class T_ORTHOPOL>
  void H1HighOrderSegm<T_ORTHOPOL> :: ComputeNDof()
  {
    ndof = 2;
    if (augmented == 1) ndof += 2;
    if (augmented == 2) ndof = 2 * order_inner;

    ndof += order_inner-1;
    order = order_inner;
  }

  template <class T_ORTHOPOL> 
  void H1HighOrderSegm<T_ORTHOPOL> ::CalcShape( const IntegrationPoint & ip,
						FlatVector<> shape) const
  {
    double x = ip(0);
    double lami[2] = { x, 1-x };
 
    shape(0) = lami[0];
    shape(1) = lami[1];

    int ii = 2;
    
    // vertex shapes
    if (augmented == 1)
      for (int i = 0; i < 2; i++)
	shape(ii++) = T_VERTEXSHAPES::Calc (order, lami[i]);
    else
      if (augmented == 2)
	for (int i = 0; i < 2; i++)
	  {
	    ArrayMem<double,20> pol(order+1);
	    LegendrePolynomial (order-1, -1+2*lami[i], pol);	
	    for (int j = 1; j < order; j++)
	      shape(ii++) = pol[j] * lami[i];
	  }
    
    int ee=1, es=0;
    if (vnums[es] > vnums[ee]) swap(es,ee);
    double * pedge = &shape(ii);
    T_ORTHOPOL::Calc (order_inner, lami[ee]-lami[es], pedge);
  }
 
  template <class T_ORTHOPOL>
  void H1HighOrderSegm<T_ORTHOPOL> :: CalcDShape (const IntegrationPoint & ip, 
						  FlatMatrix<> dshape) const
  {
    AutoDiff<1> x(ip(0),0); 
    AutoDiff<1> lami[2] = {x,1-x}; 
    dshape(0,0) = lami[0].DValue(0);
    dshape(1,0) = lami[1].DValue(0);
    
    int ee=1,es=0;
    if (vnums[es] > vnums[ee]) swap(es,ee);
    
    int ii=2; 
    ArrayMem<AutoDiff<1>,10> pedge(order_inner);  
    T_ORTHOPOL::Calc (order_inner, lami[ee]-lami[es], pedge);
    for(int i=0;i<order_inner-1;i++)
      dshape(ii++,0)=pedge[i].DValue(0); 
  }



  /* *********************** Triangle  **********************/

  template <class T_ORTHOPOL>
  H1HighOrderTrig<T_ORTHOPOL> :: H1HighOrderTrig(int aorder)
    : H1HighOrderFiniteElement(2, ET_TRIG)
  {
    order_inner = aorder;
    for (int i = 0; i < 3; i++)
      order_edge[i] = aorder;

    ComputeNDof();
  }


  template <class T_ORTHOPOL>
  void H1HighOrderTrig<T_ORTHOPOL> :: ComputeNDof()
  {
    ndof = 3;
    if (augmented == 1) ndof += 3;
    if (augmented == 2) ndof = 3 * order_inner;

    for (int i = 0; i < 3; i++)
      ndof += order_edge[i] - 1;
    ndof += (order_inner-1)*(order_inner-2) / 2;

    order = 1;
    for (int i = 0; i < 3; i++)
      if (order_edge[i] > order)
	order = order_edge[i];
    if (order_inner > order)
      order = order_inner;
  }



  template <class T_ORTHOPOL>
  void H1HighOrderTrig<T_ORTHOPOL> ::  
  GetInternalDofs (ARRAY<int> & idofs) const
  {
    idofs.SetSize (0);

    int base = 3;
    if (augmented == 1) base *= 2;
    if (augmented == 2) base *= order_inner;

    int i;
    for (i = 0; i < 3; i++)
      base += order_edge[i] - 1;

    if(order_inner > 2)
      {
	int ni = (order_inner-1)*(order_inner-2) / 2;
	for (i = 0; i < ni; i++)
	  idofs.Append (base+i);
      }
  }



  template <class T_ORTHOPOL>
  void H1HighOrderTrig<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
						 FlatVector<> shape) const
  {
    T_CalcShape<double, double, FlatVector<> > (ip(0), ip(1), shape); 

    /*
      int i, j, ii, p;
      double x = ip(0), y = ip(1);
      double lami[3] = { x,y, 1-x-y };

      for (i = 0; i < 3; i++)
      shape(i) = lami[i];

      ii = 3;

      // vertex shapes
      if (augmented == 1)
      for (i = 0; i < 3; i++)
      shape(ii++) = T_VERTEXSHAPES::Calc (order, lami[i]);
      else
      if (augmented == 2)
      for (i = 0; i < 3; i++)
      {
      ArrayMem<double,20> pol(order+1);
      LegendrePolynomial (order-1, -1+2*lami[i], pol);	
      for (j = 1; j < order; j++)
      shape(ii++) = pol[j] * lami[i];
      }
    
      // edge dofs
      const EDGE * edges = ElementTopology::GetEdges (ET_TRIG);
      for (i = 0; i < 3; i++)
      { 
      p = order_edge[i]; 
      int es = edges[i][0];
      int ee = edges[i][1];
      if (vnums[es] > vnums[ee]) swap (es, ee);
	
      double * hp = &shape(ii);
      ii += T_ORTHOPOL::CalcTrigExt (p, 
      lami[ee]-lami[es], 1-lami[es]-lami[ee], 
      hp);
      }

      int fav[3] = { 0, 1, 2 }; 
      if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
      if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
      if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	

      double * hp = &shape(ii);
      ii += T_INNERSHAPES::Calc (order_inner, lami[fav[2]]-lami[fav[1]],
      lami[fav[0]], hp);
    */
  }

  template <class T_ORTHOPOL>
  void H1HighOrderTrig<T_ORTHOPOL> :: CalcDShape (const IntegrationPoint & ip, 
						  FlatMatrix<> dshape) const
  {
    AutoDiff<2> x(ip(0), 0);
    AutoDiff<2> y(ip(1), 1);

    ArrayMem<AutoDiff<2>,40> sds(ndof);
    T_CalcShape<AutoDiff<2>, AutoDiff<2>, FlatArray<AutoDiff<2> > >
      (x,y,sds);
    for (int i = 0; i < ndof; i++)
      for (int j = 0; j < 2; j++)
	dshape(i, j) = sds[i].DValue(j);


    /*
      ArrayMem<AutoDiff<2>,40> sds(ndof);
      CalcShapeDShape (ip, sds);
      for (int i = 0; i < ndof; i++)
      for (int j = 0; j < 2; j++)
      dshape(i, j) = sds[i].DValue(j);
    */
  }



  template<typename T_ORTHOPOL> 
  template<typename Tx, typename Ty, typename TFA>  
  void  H1HighOrderTrig<T_ORTHOPOL> :: T_CalcShape (Tx x, Ty y, TFA & sds) const
  {
    Tx lami[3] = { x, y, 1-x-y };

    for (int i = 0; i < 3; i++)
      sds[i] = lami[i];

    int ii = 3;

    // vertex shapes
    if (augmented == 1)
      for (int i = 0; i < 3; i++)
	sds[ii++] = T_VERTEXSHAPES::Calc (order, lami[i]);
    else
      if (augmented == 2)
	for (int i = 0; i < 3; i++)
	  {
	    ArrayMem<Tx,20> pol(order+1);
	    LegendrePolynomial (order-1, -1+2*lami[i], pol);	
	    for (int j = 1; j < order; j++)
	      sds[ii++] = pol[j] * lami[i];
	  }
    
    // edge dofs
    const EDGE * edges = ElementTopology::GetEdges (ET_TRIG);
    for (int i = 0; i < 3; i++)
      if (order_edge[i] >= 2)
	{ 
	  int es = edges[i][0], ee = edges[i][1];
	  if (vnums[es] > vnums[ee]) swap (es, ee);
	  
	  Tx * hp = &sds[ii];
	  ii += T_ORTHOPOL::CalcTrigExt (order_edge[i], 
					 lami[ee]-lami[es], 1-lami[es]-lami[ee], hp);
	}
    
    int fav[3] = { 0, 1, 2 }; 
    if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
    if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
    if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	

    if (order_inner >= 3)
      {
	Tx * hp = &sds[ii];
	ii += T_INNERSHAPES::Calc (order_inner, lami[fav[2]]-lami[fav[1]],
				   lami[fav[0]], hp);  
      }
  }



  /*
    template <class T_ORTHOPOL>
    void H1HighOrderTrig<T_ORTHOPOL> :: CalcShapeDShape (const IntegrationPoint & ip, 
    ARRAY<AutoDiff<2> > & sds) const
    {
    int i, j, ii, p;
    AutoDiff<2> x(ip(0), 0);
    AutoDiff<2> y(ip(1), 1);
    
    AutoDiff<2> lami[3] = { x,y, 1-x-y };

    for (i = 0; i < 3; i++)
    sds[i] = lami[i];

    ii = 3;

    // vertex shapes
    if (augmented == 1)
    for (i = 0; i < 3; i++)
    sds[ii++] = T_VERTEXSHAPES::Calc (order, lami[i]);
    else
    if (augmented == 2)
    for (i = 0; i < 3; i++)
    {
    ArrayMem<AutoDiff<2>,20> pol(order+1);
    LegendrePolynomial (order-1, -1+2*lami[i], pol);	
    for (j = 1; j < order; j++)
    sds[ii++] = pol[j] * lami[i];
    }
    
    // edge dofs
    const EDGE * edges = ElementTopology::GetEdges (ET_TRIG);
    for (i = 0; i < 3; i++)
    { 
    p = order_edge[i]; 
    int es = edges[i][0];
    int ee = edges[i][1];
    if (vnums[es] > vnums[ee]) swap (es, ee);
	
    AutoDiff<2> * hp = &sds[ii];
    ii += T_ORTHOPOL::CalcTrigExt (p, 
    lami[ee]-lami[es], 1-lami[es]-lami[ee], 
    hp);
    // (AutoDiff<2> *const)&sds[ii]);	
    }

    int fav[3] = { 0, 1, 2 }; 
    if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
    if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
    if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	

    AutoDiff<2> * hp = &sds[ii];
    ii += T_INNERSHAPES::Calc (order_inner, lami[fav[2]]-lami[fav[1]],
    lami[fav[0]], hp);
    }
  */


  /*  template <class T_ORTHOPOL>
      void H1HighOrderTrig<T_ORTHOPOL> :: CalcDShape (const IntegrationPoint & ip, 
      FlatMatrix<> dshape) const
      {
      AutoDiff<2> x(ip(0),0);
      AutoDiff<2> y(ip(1),1);
      int i, j, k, ii;

      double lami[3] = { ip(0), ip(1), 1-ip(0)-ip(1) };
      double dlami[3][2] = { { 1, 0 }, { 0, 1 }, { -1, -1 } };

      AutoDiff<2> ad_lami[3];
      ad_lami[0] = x;
      ad_lami[1] = y;
      ad_lami[2] = 1-x-y;

      // vertex shapes
      for (i = 0; i < 3; i++)
      {
      dshape(i,0) = ad_lami[i].DValue(0);
      dshape(i,1) = ad_lami[i].DValue(1);
      }

      ii = 3;

      ArrayMem<AutoDiff<2>,20> pol(order+1), pol2(order+1), shapei(order_inner * order_inner);
      MatrixFixWidth<2> dedgeshape(order+1);

      if (augmented == 1)
      for (i = 0; i < 3; i++)
      {
      AutoDiff<2> ho = T_VERTEXSHAPES::Calc (order, ad_lami[i]);
      dshape(ii,0) = ho.DValue(0);
      dshape(ii,1) = ho.DValue(1);
      ii++;
      }
      else
      if (augmented == 2)
      for (i = 0; i < 3; i++)
      {
      LegendrePolynomial (order-1, -1+2*ad_lami[i], pol);	
      for (j = 0; j < order; j++)
      pol[j] *= ad_lami[i];

      for (j = 1; j < order; j++)
      {
      dshape(ii,0) = pol[j].DValue(0);
      dshape(ii,1) = pol[j].DValue(1);
      ii++;
      }
      }


      // edge dofs
      const EDGE * edges = ElementTopology::GetEdges (ET_TRIG);
      for (i = 0; i < 3; i++)
      {
      // edge start, edge end
      int es = edges[i][0];
      int ee = edges[i][1];
      if (vnums[es] > vnums[ee]) swap (es, ee);

      FlatMatrixFixWidth<2> mat_edge(order_edge[i]-1, &dshape(ii,0));
      ii += T_ORTHOPOL::CalcTrigExtDeriv 
      (order_edge[i], lami[ee]-lami[es], 1-lami[es]-lami[ee], mat_edge);	

      for (j = 0; j < order_edge[i]-1; j++)
      {
      double gradxi = mat_edge(j,0);
      double gradeta = mat_edge(j,1);
      for (k = 0; k < 2; k++)
      mat_edge(j,k) = 
      (dlami[ee][k]-dlami[es][k]) * gradxi - 
      (dlami[ee][k]+dlami[es][k]) * gradeta;
      }
      }

      int fav[3] = { 0, 1, 2 }; 
      if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
      if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
      if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	

      int ni = T_INNERSHAPES::Calc (order_inner, ad_lami[fav[2]]-ad_lami[fav[0]],
      ad_lami[fav[1]], shapei);
      for (i = 0; i < ni; i++)
      {
      dshape(ii,0) = shapei[i].DValue(0);
      dshape(ii,1) = shapei[i].DValue(1);
      ii++;
      }
    
      } */




  /* *********************** Quadrilateral  **********************/


  template <class T_ORTHOPOL>
  H1HighOrderQuad<T_ORTHOPOL> :: H1HighOrderQuad (int aorder)
    : H1HighOrderFiniteElement(2, ET_QUAD)
  {
    order_inner = aorder;
    for (int i = 0; i < 4; i++)
      order_edge[i] = aorder;
    ComputeNDof();
  }


  template <class T_ORTHOPOL>
  void H1HighOrderQuad<T_ORTHOPOL> :: ComputeNDof()
  {
    order = 1;
    for (int i = 0; i < 4; i++)
      if (order_edge[i] > order)
	order = order_edge[i];
    if (order_inner > order)
      order = order_inner;

    ndof = 4;
    if (augmented == 1) ndof *= 2;
    if (augmented == 2) ndof *= order;

    for (int i = 0; i < 4; i++)
      ndof += order_edge[i] - 1;
    
    ndof += (order_inner-1)*(order_inner-1);
  }



  
  template <class T_ORTHOPOL>
  void H1HighOrderQuad<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
						 FlatVector<> shape) const
  {  
    T_CalcShape<double, double, FlatVector<> > (ip(0), ip(1), shape); 

    /*
      ArrayMem<AutoDiff<2>, 40> sds(ndof);
      CalcShapeDShape (ip, sds);
      for (int i = 0; i < ndof; i++)
      shape(i) = sds[i].Value();
    */
  }

  template <class T_ORTHOPOL>
  void H1HighOrderQuad<T_ORTHOPOL> :: CalcDShape (const IntegrationPoint & ip, 
						  FlatMatrix<> dshape) const
  {
    AutoDiff<2> x(ip(0), 0);
    AutoDiff<2> y(ip(1), 1);

    ArrayMem<AutoDiff<2>,40> sds(ndof);
    T_CalcShape<AutoDiff<2>, AutoDiff<2>, FlatArray<AutoDiff<2> > >
      (x,y,sds);
    for (int i = 0; i < ndof; i++)
      for (int j = 0; j < 2; j++)
	dshape(i, j) = sds[i].DValue(j);
    /*
      ArrayMem<AutoDiff<2>,25> sds(ndof);
      CalcShapeDShape (ip, sds);
      for (int i = 0; i < ndof; i++)
      for (int j = 0; j < 2; j++)
      dshape(i, j) = sds[i].DValue(j);
    */
  }

  template<typename T_ORTHOPOL> 
  template<typename Tx, typename Ty, typename TFA>  
  void  H1HighOrderQuad<T_ORTHOPOL> :: T_CalcShape (Tx x, Ty y, TFA & sds) const
  {
    Tx lami[4] = {(1-x)*(1-y),x*(1-y),x*y,(1-x)*y};  
    Tx sigma[4] = {(1-x)+(1-y),x+(1-y),x+y,(1-x)+y};  
    
    // vertex shapes
    for(int i=0; i < 4; i++) sds[i] = lami[i]; 
    int ii = 4;

    if (augmented == 1)
      for (int i = 0; i < 4; i++)
	sds[ii++] = T_VERTEXSHAPES::Calc (order, sds[i]);
    else
      if (augmented == 2)
	for (int i = 0; i < 4; i++)
	  {
	    ArrayMem<Tx,20> pol(order+1);
	    LegendrePolynomial (order-1, -1+2*sds[i], pol);	
	    for (int j = 1; j < order; j++)
	      sds[ii++] = pol[j] * sds[i];
	  }

    ArrayMem<Tx,20> polxi(order+1), poleta(order+1);
     
    // edge dofs
    const EDGE * edges = ElementTopology::GetEdges (ET_QUAD);
    for (int i = 0; i < 4; i++)
      {
	int p = order_edge[i];
	int es = edges[i][0];
	int ee = edges[i][1];
	if (vnums[es] > vnums[ee]) swap (es, ee);

	Tx xi  = sigma[ee]-sigma[es]; // in [-1,1] 
	Tx eta = lami[ee]+lami[es];  // attention in [0,1]

	T_ORTHOPOL::Calc (p, xi, polxi);
	
	for (int j = 0; j <= p-2; j++)
	  sds[ii++] = eta * polxi[j];
      }    
    
    if (order_inner >= 2)
      {
	int p = order_inner;
	int fmax = 0; 
	for (int j = 1; j < 4; j++)
	  if (vnums[j] > vnums[fmax]) fmax = j;
	
	int f1 = (fmax+3)%4; 
	int f2 = (fmax+1)%4; 
	if(vnums[f2] > vnums[f1]) swap(f1,f2);  // fmax > f1 > f2; 
	
	Tx xi = sigma[fmax]-sigma[f1]; 
	Tx eta = sigma[fmax]-sigma[f2]; 
	
	T_ORTHOPOL::Calc(p+1, xi,polxi);
	T_ORTHOPOL::Calc(p+1,eta,poleta);
	
	for (int k = 0; k <= p-2; k++)
	  for (int j = 0; j <= p-2; j++)
	    sds[ii++] = polxi[k] * poleta[j];
      }
   
  }






  /*
    template <class T_ORTHOPOL>
    void H1HighOrderQuad<T_ORTHOPOL> :: CalcShapeDShape (const IntegrationPoint & ip, 
    ARRAY<AutoDiff<2> > & sds) const
    {
    //    augmented = 0; 

    AutoDiff<2> x (ip(0), 0);
    AutoDiff<2> y (ip(1), 1);
    int i, j, k, ii, p;
    
    AutoDiff<2> lami[4] = {(1-x)*(1-y),x*(1-y),x*y,(1-x)*y};  
    AutoDiff<2> sigma[4] = {(1-x)+(1-y),x+(1-y),x+y,(1-x)+y};  
    
    // vertex shapes
    for(i=0;i<4;i++) sds[i] = lami[i]; 
    ii = 4;

    if (augmented == 1)
    for (i = 0; i < 4; i++)
    sds[ii++] = T_VERTEXSHAPES::Calc (order, sds[i]);
    else
    if (augmented == 2)
    for (i = 0; i < 4; i++)
    {
    ArrayMem<AutoDiff<2>,20> pol(order+1);
    LegendrePolynomial (order-1, -1+2*sds[i], pol);	
    for (j = 1; j < order; j++)
    sds[ii++] = pol[j] * sds[i];
    }

    ArrayMem<AutoDiff<2>,20> polxi(order+1), poleta(order+1);
     
    // edge dofs
    const EDGE * edges = ElementTopology::GetEdges (ET_QUAD);
    for (i = 0; i < 4; i++)
    {
    p = order_edge[i];
    int es = edges[i][0];
    int ee = edges[i][1];
    if (vnums[es] > vnums[ee]) swap (es, ee);

    AutoDiff<2> xi  = sigma[ee]-sigma[es]; // in [-1,1] 
    AutoDiff<2> eta = lami[ee]+lami[es];  // attention in [0,1]

    T_ORTHOPOL::Calc (p, xi, polxi);
	
    for (j = 0; j <= p-2; j++)
    sds[ii++] = eta * polxi[j];
    }    
    
    if (order_inner >= 2)
    {
    p = order_inner;
    int fmax = 0; 
    for (j = 1; j < 4; j++)
    if (vnums[j] > vnums[fmax])
    fmax = j;
	
    int f1 = (fmax+3)%4; 
    int f2 = (fmax+1)%4; 
    if(vnums[f2] > vnums[f1]) swap(f1,f2);  // fmax > f1 > f2; 
	
    AutoDiff<2> xi = sigma[fmax]-sigma[f1]; 
    AutoDiff<2> eta = sigma[fmax]-sigma[f2]; 
	
    T_ORTHOPOL::Calc(p+1, xi,polxi);
    T_ORTHOPOL::Calc(p+1,eta,poleta);
	
    for (k = 0; k <= p-2; k++)
    for (j = 0; j <= p-2; j++)
    sds[ii++] = polxi[k] * poleta[j];
    }
   
    }
  */  





  /* *********************** Tetrahedron  **********************/

  template <class T_ORTHOPOL>
  H1HighOrderTet<T_ORTHOPOL> :: H1HighOrderTet (int aorder)
    : H1HighOrderFiniteElement(3, ET_TET)
  {
    order_inner = aorder;
    int i;
    for (i = 0; i < 6; i++)
      order_edge[i] = aorder;
    for (i = 0; i < 4; i++)
      order_face[i] = aorder;

    ComputeNDof();
  }


  template <class T_ORTHOPOL>
  void H1HighOrderTet<T_ORTHOPOL> :: ComputeNDof()
  {
    ndof = 4;
    if (augmented == 1) ndof *= 2;
    if (augmented == 2) ndof *= order_inner;

    for (int i = 0; i < 6; i++)
      ndof += order_edge[i] - 1;
    for (int i = 0; i < 4; i++)
      ndof += (order_face[i]-1)*(order_face[i]-2) / 2;
    ndof += (order_inner-1)*(order_inner-2)*(order_inner-3) / 6;

    order = 1;
    for (int i = 0; i < 6; i++)
      if (order_edge[i] > order)
	order = order_edge[i];
    for (int i = 0; i < 4; i++)
      if (order_face[i] > order)
	order = order_face[i];
    if (order_inner > order)
      order = order_inner;
  }


  template <class T_ORTHOPOL>
  void H1HighOrderTet<T_ORTHOPOL> ::  
  GetInternalDofs (ARRAY<int> & idofs) const
  {
    idofs.SetSize (0);

    int base = 4;
    if (augmented == 1) base *= 2;
    if (augmented == 2) base *= order_inner;

    for (int i = 0; i < 6; i++)
      base += order_edge[i] - 1;
    for (int i = 0; i < 4; i++)
      base += (order_face[i]-1)*(order_face[i]-2) / 2;

    if(order_inner > 2)
      {
	int ni = (order_inner-1)*(order_inner-2)*(order_inner-3) / 6;
	for (int i = 0; i < ni; i++)
	  idofs.Append (base+i);
      }
  }

  
  template <class T_ORTHOPOL>
  void H1HighOrderTet<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
						FlatVector<> shape) const
  {
    T_CalcShape<double, double, double, FlatVector<> > (ip(0), ip(1), ip(2), shape); 
    
    /*
      CalcShapeDShape (ip, sds);
      for (int i = 0; i < ndof; i++)
      shape(i) = sds[i].Value();
    */
  }
  
  template <class T_ORTHOPOL>
  void H1HighOrderTet<T_ORTHOPOL> :: CalcDShape (const IntegrationPoint & ip, 
						 FlatMatrix<> dshape) const
  {
    AutoDiff<3> x(ip(0), 0);
    AutoDiff<3> y(ip(1), 1);
    AutoDiff<3> z(ip(2), 2);

    ArrayMem<AutoDiff<3>,40> sds(ndof);
    T_CalcShape<AutoDiff<3>, AutoDiff<3>, AutoDiff<3>, FlatArray<AutoDiff<3> > >
      (x,y,z,sds);
    for (int i = 0; i < ndof; i++)
      for (int j = 0; j < 3; j++)
	dshape(i, j) = sds[i].DValue(j);
  }
     
  template<typename T_ORTHOPOL> 
  template<typename Tx, typename Ty, typename Tz, typename TFA>  
  void  H1HighOrderTet<T_ORTHOPOL> :: T_CalcShape (Tx x, Ty y, Tz z, TFA & sds) const
  {
    Tx lami[4] = { x, y, z, 1-x-y-z };

    ArrayMem<Tx, 20> polx(order+1), poly(order+1), polz(order+1); 

    // vertex shapes
    for (int i = 0; i < 4; i++)
      sds[i] = lami[i];
    int ii = 4; 

    if (augmented == 1)
      for (int i = 0; i < 4; i++)
	sds[ii++] = T_VERTEXSHAPES::Calc (order, lami[i]);
    else
      if (augmented == 2)
	for (int i = 0; i < 4; i++)
	  {
	    ArrayMem<Tx,20> pol(order+1);
	    LegendrePolynomial (order-1, -1+2*lami[i], pol);	
	    for (int j = 1; j < order; j++)
	      sds[ii++] = pol[j] * lami[i];
	  }
 
    // edge dofs
    const EDGE * edges = ElementTopology::GetEdges (ET_TET);
    for (int i = 0; i < 6; i++)
      if (order_edge[i] >= 2)
	{
	  int es = edges[i][0], ee = edges[i][1];
	  if (vnums[es] > vnums[ee]) swap (es, ee);
	  
	  Tx * hp = &sds[ii];
	  ii += T_ORTHOPOL::CalcTrigExt (order_edge[i], lami[ee]-lami[es], 
					 1-lami[es]-lami[ee], hp);
	}

    // face dofs
    const FACE * faces = ElementTopology::GetFaces (ET_TET);
    for (int i = 0; i < 4; i++)
      if (order_face[i] >= 3)
	{
	  int fav[3] = { faces[i][0], faces[i][1], faces[i][2] };
	  
	  if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
	  if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
	  if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	
	  
	  int vop = 6 - fav[0] - fav[1] - fav[2];  	
	  
	  Tx * hp = &sds[ii];
	  ii += T_FACESHAPES::Calc (order_face[i], 
				    lami[fav[2]]-lami[fav[1]],
				    lami[fav[0]], lami[vop],  hp);
	}

    Tx * psds = &sds[ii];
    ii += T_INNERSHAPES::Calc (order_inner, x-(1-x-y-z), y, z, psds);
  }

  /*  
      template <class T_ORTHOPOL>
      void H1HighOrderTet<T_ORTHOPOL> :: CalcShapeDShape (const IntegrationPoint & ip, 
      ARRAY<AutoDiff<3> > & sds) const
      {
      int i, j, k, ii, p;

      AutoDiff<3> x(ip(0), 0);
      AutoDiff<3> y(ip(1), 1);
      AutoDiff<3> z(ip(2), 2);

      AutoDiff<3> lami[4] = { x, y, z, 1-x-y-z };

      ArrayMem<AutoDiff<3>,20> polx(order+1), poly(order+1), polz(order+1); 

      // vertex shapes
      for (i = 0; i < 4; i++)
      sds[i] = lami[i];
      ii = 4; 

      if (augmented == 1)
      for (i = 0; i < 4; i++)
      sds[ii++] = T_VERTEXSHAPES::Calc (order, lami[i]);
      else
      if (augmented == 2)
      for (i = 0; i < 4; i++)
      {
      ArrayMem<AutoDiff<3>,20> pol(order+1);
      LegendrePolynomial (order-1, -1+2*lami[i], pol);	
      for (j = 1; j < order; j++)
      sds[ii++] = pol[j] * lami[i];
      }

      // edge dofs
      const EDGE * edges = ElementTopology::GetEdges (ET_TET);
      for (i = 0; i < 6; i++)
      {
      p = order_edge[i];
      int es = edges[i][0];
      int ee = edges[i][1];
      if (vnums[es] > vnums[ee]) swap (es, ee);

      AutoDiff<3> * hp = &sds[ii];
      ii += T_ORTHOPOL::CalcTrigExt (p, lami[ee]-lami[es], 
      1-lami[es]-lami[ee], hp);
      }

       
      // face dofs
      const FACE * faces = ElementTopology::GetFaces (ET_TET);
      for (i = 0; i < 4; i++)
      {
      int fav[3] = { faces[i][0], faces[i][1], faces[i][2] };

      if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
      if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
      if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	

      int vop = 6 - fav[0] - fav[1] - fav[2];  	
      
      AutoDiff<3> * hp = &sds[ii];
      ii += T_FACESHAPES::Calc (order_face[i], 
      lami[fav[2]]-lami[fav[1]],
      lami[fav[0]], lami[vop],  hp);
      }

      AutoDiff<3> * psds = &sds[ii];
      ii += T_INNERSHAPES::Calc (order_inner, x-(1-x-y-z), y, z, psds);
      }
  */


  /* *********************** Prism  **********************/


  template <class T_ORTHOPOL>
  H1HighOrderPrism<T_ORTHOPOL> :: H1HighOrderPrism (int aorder)
    : H1HighOrderFiniteElement(3, ET_PRISM)
  {
    order_inner = aorder;
    for (int i = 0; i < 9; i++)
      order_edge[i] = aorder;
    for (int i = 0; i < 5; i++)
      order_face[i] = aorder;

    ComputeNDof();
  }


  template <class T_ORTHOPOL>  
  void H1HighOrderPrism<T_ORTHOPOL> :: ComputeNDof()
  {
    ndof = 6;
    if (augmented == 1) ndof *= 2;
    if (augmented == 2) ndof *= order_inner;

    for (int i = 0; i < 9; i++)
      ndof += order_edge[i] - 1;
    for (int i = 0; i < 2; i++)
      ndof += (order_face[i]-1)*(order_face[i]-2) / 2;
    for (int i = 2; i < 5; i++)
      ndof += (order_face[i]-1)*(order_face[i]-1);
    
    ndof += (order_inner-1)*(order_inner-2)*(order_inner-1) / 2;

    order = 1;
    for (int i = 0; i < 9; i++)
      if (order_edge[i] > order)
	order = order_edge[i];
    for (int i = 0; i < 5; i++)
      if (order_face[i] > order)
	order = order_face[i];
    if (order_inner > order)
      order = order_inner;
  }
  

  template <class T_ORTHOPOL>
  void H1HighOrderPrism<T_ORTHOPOL> ::  
  GetInternalDofs (ARRAY<int> & idofs) const
  {
    idofs.SetSize (0);

    int base = 6;
    if (augmented == 1) base *= 2;
    if (augmented == 2) base *= order_inner;

    for (int i = 0; i < 9; i++)
      base += order_edge[i] - 1;
    for (int i = 0; i < 2; i++)
      base += (order_face[i]-1)*(order_face[i]-2) / 2;
    for (int i = 2; i < 5; i++)
      base += (order_face[i]-1)*(order_face[i]-1);

    if(order_inner > 2)
      {
	int ni = (order_inner-1)*(order_inner-2)*(order_inner-1) / 2;
	for (int i = 0; i < ni; i++)
	  idofs.Append (base+i);
      }
  }



  template <class T_ORTHOPOL>
  void H1HighOrderPrism<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
						  FlatVector<> shape) const
  {
    T_CalcShape<double, double, double, FlatVector<> > (ip(0), ip(1), ip(2), shape); 
    /*
      ArrayMem<AutoDiff<3>, 40> sds(ndof);
      CalcShapeDShape (ip, sds);
      for (int i = 0; i < ndof; i++)
      shape(i) = sds[i].Value();
    */
  }

  template <class T_ORTHOPOL>
  void H1HighOrderPrism<T_ORTHOPOL> :: CalcDShape (const IntegrationPoint & ip, 
						   FlatMatrix<> dshape) const
  {
    AutoDiff<3> x(ip(0), 0);
    AutoDiff<3> y(ip(1), 1);
    AutoDiff<3> z(ip(2), 2);

    ArrayMem<AutoDiff<3>,40> sds(ndof);
    T_CalcShape<AutoDiff<3>, AutoDiff<3>, AutoDiff<3>, FlatArray<AutoDiff<3> > >
      (x,y,z,sds);
    for (int i = 0; i < ndof; i++)
      for (int j = 0; j < 3; j++)
	dshape(i, j) = sds[i].DValue(j);
    /*
      ArrayMem<AutoDiff<3>,40> sds(ndof);
      CalcShapeDShape (ip, sds);
      for (int i = 0; i < ndof; i++)
      for (int j = 0; j < 3; j++)
      dshape(i, j) = sds[i].DValue(j);
    */
  }


  template<typename T_ORTHOPOL> 
  template<typename Tx, typename Ty, typename Tz, typename TFA>  
  void  H1HighOrderPrism<T_ORTHOPOL> :: T_CalcShape (Tx x, Ty y, Tz z, TFA & sds) const
  {
    Tx lami[6] = { x, y, 1-x-y, x, y, 1-x-y };
    Tx muz[6]  = { 1-z, 1-z, 1-z, z, z, z };
    
    // vertex shapes
    for (int i = 0; i < 6; i++)
      sds[i] = lami[i] * muz[i];

    int ii = 6;

    if (augmented == 1)
      for (int i = 0; i < 6; i++)
	sds[ii++] = T_VERTEXSHAPES::Calc (order, sds[i]);
    else
      if (augmented == 2)
	for (int i = 0; i < 6; i++)
	  {
	    ArrayMem<Tx,20> pol(order+1);
	    LegendrePolynomial (order-1, -1+2*sds[i], pol);	
	    for (int j = 1; j < order; j++)
	      sds[ii++] = pol[j] * sds[i];
	  }

    // horizontal edge dofs
    const EDGE * edges = ElementTopology::GetEdges (ET_PRISM);
    for (int i = 0; i < 6; i++)
      if (order_edge[i] >= 2)
	{
	  int p = order_edge[i];
	  int es = edges[i][0], ee = edges[i][1];
	  if (vnums[es] > vnums[ee]) swap (es, ee);

	  Tx xi = lami[ee]-lami[es]; 
	  Tx eta = lami[es]+lami[ee]; 
	  
	  Tx * hp = &sds[ii];
	  T_ORTHOPOL::CalcTrigExt (p, xi, 1-eta, hp);
	  
	  for (int k = 0; k < p-1; k++)
	    sds[ii++] *= muz[ee];  
	}
    
    // vertical edges
    for (int i = 6; i < 9; i++)
      if (order_edge[i] >= 2)
	{
	  int p = order_edge[i];
	  int es = edges[i][0], ee = edges[i][1];
	  if (vnums[es] > vnums[ee]) swap (es, ee);
	  
	  Tx * hp = &sds[ii];
	  T_ORTHOPOL::Calc (p, muz[ee]-muz[es], hp);
	  
	  for (int j = 0; j < p-1; j++)
	    sds[ii++] *= lami[ee];
	}
    

    ArrayMem<Tx,20> polx(order+1) /* , poly(order+1) */ , polz(order+1);


    const FACE * faces = ElementTopology::GetFaces (ET_PRISM);
    // trig face dofs
    for (int i = 0; i < 2; i++)
      if (order_face[i] >= 3)
	{
	  int fav[3] = { faces[i][0], faces[i][1], faces[i][2] };
	  if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
	  if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
	  if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	
	  
	  Tx * pface = &sds[ii];
	  int ndf = T_TRIGFACESHAPES::Calc 
	    (order_face[i], lami[fav[2]]-lami[fav[1]], lami[fav[0]], pface);
	  
	  for (int j = 0; j < ndf; j++)
	    pface[j] *= muz[fav[2]];
	  ii += ndf;
	}
   
    // quad face dofs
    for (int i = 2; i < 5; i++)
      if (order_face[i] >= 2)
	{
	  int p = order_face[i];
	  
	  int fmax = 0;
	  for(int j = 1; j < 4; j++) 
	    if(vnums[faces[i][j]] > vnums[faces[i][fmax]]) fmax = j;
	  
	  int fz = 3-fmax; 
	  int ftrig = fmax^1; 
	  
	  Tx xi = lami[faces[i][fmax]] - lami[faces[i][ftrig]]; 
	  Tx eta = 1-lami[faces[i][fmax]]-lami[faces[i][ftrig]]; 
	  Tx zeta = muz[faces[i][fmax]]-muz[faces[i][fz]]; 
	  
	  T_ORTHOPOL::CalcTrigExt (p, xi, eta, polx);  
	  T_ORTHOPOL::Calc (p, zeta, polz);	
	  
	  if (vnums[faces[i][ftrig]] > vnums[faces[i][fz]])  
	    for (int k = 0; k < p-1; k++)
	      for (int j = 0; j < p-1; j++)
		sds[ii++] = polx[k] * polz[j];
	  else
	    for (int j = 0; j < p-1; j++)
	      for (int k = 0; k < p-1; k++)
		sds[ii++] = polx[k] * polz[j];
	}
    
    // volume dofs:
    if (order_inner >= 3)
      {
	int p = order_inner;
	ArrayMem<Tx,20> pol_trig((p-1)*(p-2)/2);
	int nf = T_TRIGFACESHAPES::Calc (order_inner, x-y, 1-x-y, pol_trig);

	T_ORTHOPOL:: Calc(order_inner, 2*z-1, polz);
	for (int i = 0; i < nf; i++)
	  for (int k = 0; k < p-1; k++)
	    sds[ii++] = pol_trig[i] * polz[k];
      }
    
  }




  /*
    template <class T_ORTHOPOL>
    void H1HighOrderPrism<T_ORTHOPOL> :: CalcShapeDShape (const IntegrationPoint & ip, 
    ARRAY<AutoDiff<3> > & sds) const
    {
    AutoDiff<3> x (ip(0), 0);
    AutoDiff<3> y (ip(1), 1);
    AutoDiff<3> z (ip(2), 2);
    int i, j, k, ii, p;
    
    AutoDiff<3> lami[6] = { x, y, 1-x-y, x, y, 1-x-y };
    AutoDiff<3> muz[6]  = { 1-z, 1-z, 1-z, z, z, z };
    
    // vertex shapes
    ii = 0;
    for (i = 0; i < 6; i++)
    sds[ii++] = lami[i] * muz[i];

    if (augmented == 1)
    for (i = 0; i < 6; i++)
    sds[ii++] = T_VERTEXSHAPES::Calc (order, sds[i]);
    else
    if (augmented == 2)
    for (i = 0; i < 6; i++)
    {
    ArrayMem<AutoDiff<3>,20> pol(order+1);
    LegendrePolynomial (order-1, -1+2*sds[i], pol);	
    for (j = 1; j < order; j++)
    sds[ii++] = pol[j] * sds[i];
    }

        
    //horizontal edge dofs new 
    const EDGE * edges = ElementTopology::GetEdges (ET_PRISM);
    for (i = 0; i < 6; i++)
    {
    p = order_edge[i];
    int es = edges[i][0];
    int ee = edges[i][1];
    if (vnums[es] > vnums[ee]) swap (es, ee);
    AutoDiff<3> xi = lami[ee]-lami[es]; 
    AutoDiff<3> eta = lami[es]+lami[ee]; 

    AutoDiff<3> * hp = &sds[ii];
    T_ORTHOPOL::CalcTrigExt (p, xi, 1-eta, hp);

    for (k = 0; k < p-1; k++)
    sds[ii++] *= muz[ee];  
    }
    
    // vertical edges
    for (i = 6; i < 9; i++)
    {
    p = order_edge[i];
    int es = edges[i][0];
    int ee = edges[i][1];
    if (vnums[es] > vnums[ee]) swap (es, ee);

    AutoDiff<3> * hp = &sds[ii];
    T_ORTHOPOL::Calc (p, muz[ee]-muz[es], hp);

    for (j = 0; j < p-1; j++)
    sds[ii++] *= lami[ee];
    }
    

    ArrayMem<AutoDiff<3>,20> polx(order+1), poly(order+1), polz(order+1);


    const FACE * faces = ElementTopology::GetFaces (ET_PRISM);
    // trig face dofs
    for (i = 0; i < 2; i++)
    {
    p = order_face[i];

    int fav[3] = { faces[i][0], faces[i][1], faces[i][2] };
    if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
    if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
    if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	
	
    AutoDiff<3> * pface = &sds[ii];
    int ndf = T_TRIGFACESHAPES::Calc 
    (p, lami[fav[2]]-lami[fav[1]], lami[fav[0]], pface);
	
    for (j = 0; j < ndf; j++)
    pface[j] *= muz[fav[2]];
    ii += ndf;
    }
   
   
    // quad face dofs
    for (i = 2; i < 5; i++)
    {
    p = order_face[i];

    int fmax = 0;
    for(j = 1; j < 4; j++) 
    if(vnums[faces[i][j]] > vnums[faces[i][fmax]]) fmax = j;
	
    int fz = 3-fmax; 
    int ftrig = fmax^1; 
	
    AutoDiff<3> xi = lami[faces[i][fmax]] - lami[faces[i][ftrig]]; 
    AutoDiff<3> eta = 1-lami[faces[i][fmax]]-lami[faces[i][ftrig]]; 
    AutoDiff<3> zeta = muz[faces[i][fmax]]-muz[faces[i][fz]]; 
	     
    T_ORTHOPOL::CalcTrigExt (p, xi, eta, polx);  
    T_ORTHOPOL::Calc (p, zeta, polz);	
	
    if (vnums[faces[i][ftrig]] > vnums[faces[i][fz]]) //SZ 
    for (k = 0; k < p-1; k++)
    for (j = 0; j < p-1; j++)
    sds[ii++] = polx[k] * polz[j];
    else
    for (j = 0; j < p-1; j++)
    for (k = 0; k < p-1; k++)
    sds[ii++] = polx[k] * polz[j];
    }
   
    // volume dofs:
    p = order_inner;
    if (p >= 3)
    {
    ArrayMem<AutoDiff<3>,20> pol_trig((p-1)*(p-2)/2);
    int nf = T_TRIGFACESHAPES::Calc (p, x-y, 1-x-y, pol_trig);

    T_ORTHOPOL:: Calc(p, 2*z-1, polz);
    for (i = 0; i < nf; i++)
    for (k = 0; k < p-1; k++)
    sds[ii++] = pol_trig[i] * polz[k];
    }
    
    }
  */


  /* *********************** Hex  **********************/

  template <class T_ORTHOPOL>
  H1HighOrderHex<T_ORTHOPOL> :: H1HighOrderHex (int aorder)
    : H1HighOrderFiniteElement(3, ET_HEX)
  {
    order_inner = aorder;
    for (int i = 0; i < 12; i++)
      order_edge[i] = aorder;
    for (int i = 0; i < 6; i++)
      order_face[i] = aorder;
    ComputeNDof();
  }

  template <class T_ORTHOPOL>
  void H1HighOrderHex<T_ORTHOPOL> :: ComputeNDof()
  {
    ndof = 8;

    if (augmented == 1) ndof *= 2;
    if (augmented == 2) ndof *= order_inner;

    for (int i = 0; i < 12; i++)
      ndof += order_edge[i] - 1;
    for (int i = 0; i < 6; i++)
      ndof +=  (order_face[i]-1)*(order_face[i]-1);
    ndof += (order_inner-1)*(order_inner-1)*(order_inner-1);

    order = 1;
    for (int i = 0; i < 12; i++)
      if (order_edge[i] > order)
	order = order_edge[i];
    for (int i = 0; i < 6; i++)
      if (order_face[i] > order)
	order = order_face[i];
    if (order_inner > order)
      order = order_inner;
  }

  template <class T_ORTHOPOL>
  void H1HighOrderHex<T_ORTHOPOL> ::  
  GetInternalDofs (ARRAY<int> & idofs) const
  {
    idofs.SetSize (0);

    int base = 8;
    if (augmented == 1) base *= 2;
    if (augmented == 2) base *= order_inner;

    for (int i = 0; i < 12; i++)
      base += order_edge[i] - 1;
    for (int i = 0; i < 6; i++)
      base += (order_face[i]-1)*(order_face[i]-1);

    if(order_inner >= 2)
      {
	int ni = (order_inner-1)*(order_inner-1)*(order_inner-1);
	for (int i = 0; i < ni; i++)
	  idofs.Append (base+i);
      }
  }



  
  template <class T_ORTHOPOL>
  void H1HighOrderHex<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
						FlatVector<> shape) const
  {
    T_CalcShape<double, double, double, FlatVector<> > (ip(0), ip(1), ip(2), shape); 
    /*
      ArrayMem<AutoDiff<3>, 40> sds(ndof);
      CalcShapeDShape (ip, sds);
      for (int i = 0; i < ndof; i++)
      shape(i) = sds[i].Value();
    */
  }

  template <class T_ORTHOPOL>
  void H1HighOrderHex<T_ORTHOPOL> :: CalcDShape (const IntegrationPoint & ip, 
						 FlatMatrix<> dshape) const
  {
    AutoDiff<3> x(ip(0), 0);
    AutoDiff<3> y(ip(1), 1);
    AutoDiff<3> z(ip(2), 2);

    ArrayMem<AutoDiff<3>,40> sds(ndof);
    T_CalcShape<AutoDiff<3>, AutoDiff<3>, AutoDiff<3>, FlatArray<AutoDiff<3> > >
      (x,y,z,sds);
    for (int i = 0; i < ndof; i++)
      for (int j = 0; j < 3; j++)
	dshape(i, j) = sds[i].DValue(j);

    /*
      ArrayMem<AutoDiff<3>,40> sds(ndof);
      CalcShapeDShape (ip, sds);
      for (int i = 0; i < ndof; i++)
      for (int j = 0; j < 3; j++)
      dshape(i, j) = sds[i].DValue(j);
    */
  }




  template<typename T_ORTHOPOL> 
  template<typename Tx, typename Ty, typename Tz, typename TFA>  
  void  H1HighOrderHex<T_ORTHOPOL> :: T_CalcShape (Tx x, Ty y, Tz z, TFA & sds) const
  { 
    Tx lami[8]={(1-x)*(1-y)*(1-z),x*(1-y)*(1-z),x*y*(1-z),(1-x)*y*(1-z),
		(1-x)*(1-y)*z,x*(1-y)*z,x*y*z,(1-x)*y*z}; 
    Tx sigma[8]={(1-x)+(1-y)+(1-z),x+(1-y)+(1-z),x+y+(1-z),(1-x)+y+(1-z),
		 (1-x)+(1-y)+z,x+(1-y)+z,x+y+z,(1-x)+y+z}; 

    // vertex shapes
    for(int i=0; i<8; i++) sds[i] = lami[i]; 
    int ii = 8;

    if (augmented == 1)
      for (int i = 0; i < 8; i++)
	sds[ii++] = T_VERTEXSHAPES::Calc (order, sds[i]);
    else
      if (augmented == 2)
	for (int i = 0; i < 8; i++)
	  {
	    ArrayMem<Tx,20> pol(order+1);
	    LegendrePolynomial (order-1, -1+2*sds[i], pol);	
	    for (int j = 1; j < order; j++)
	      sds[ii++] = pol[j] * sds[i];
	  }

    ArrayMem<Tx,20> polx(order+1), poly(order+1), polz(order+1);
    
    // edge dofs
    const EDGE * edges = ElementTopology::GetEdges (ET_HEX);
    // const POINT3D * points = ElementTopology::GetVertices (ET_HEX);
    for (int i = 0; i < 12; i++)
      if (order_edge[i] >= 2)
	{
	  int p = order_edge[i];
	  int es = edges[i][0], ee = edges[i][1];
	  if (vnums[es] > vnums[ee]) swap (es, ee);
	  
	  Tx xi = sigma[ee]-sigma[es]; 
	  Tx lam_e = lami[es]+lami[ee];
	  T_ORTHOPOL::Calc (p, xi, polx);
	  
	  for (int j = 0; j < p-1; j++)
	    sds[ii++] = lam_e * polx[j];
	}
     
    const FACE * faces = ElementTopology::GetFaces (ET_HEX);
    for (int i = 0; i < 6; i++)
      if (order_face[i] >= 2)
	{
	  int p = order_face[i];

	  Tx lam_f = 0;
	  for (int j = 0; j < 4; j++)
	    lam_f += lami[faces[i][j]];
	  
	  int fmax = 0;
	  for (int j=1; j<4; j++) 
	    if (vnums[faces[i][j]] > vnums[faces[i][fmax]]) fmax = j;  
	  
	  int f1 = faces[i][(fmax+3)%4];
	  int f2 = faces[i][(fmax+1)%4]; 
	  fmax = faces[i][fmax]; 
	  
	  if(vnums[f2]>vnums[f1]) swap(f1,f2);  // fmax > f1 > f2 
	  
	  Tx xi  = sigma[fmax] - sigma[f1]; 
	  Tx eta = sigma[fmax] - sigma[f2];
	  
	  T_ORTHOPOL::Calc (p, xi, polx);	
	  T_ORTHOPOL::Calc (p, eta, poly);	
	  
	  for (int k = 0; k < p-1; k++) 
	    {
	      Tx pxl = polx[k] * lam_f;
	      for (int j = 0; j < p-1; j++) 
		sds[ii++]= pxl * poly[j];
	    }
	}
    
    // volume dofs:
    if (order_inner >= 2)
      {
	int p = order_inner;
	T_ORTHOPOL::Calc (p, 2*x-1, polx);
	T_ORTHOPOL::Calc (p, 2*y-1, poly);
	T_ORTHOPOL::Calc (p, 2*z-1, polz);
	for (int i = 0; i < p-1; i++)
	  for (int j = 0; j < p-1; j++)
	    {
	      Tx pxy = polx[i] * poly[j];
	      for (int k = 0; k < p-1; k++)
		sds[ii++] = pxy * polz[k];
	    }
      }
  }


  /*
    template <class T_ORTHOPOL>
    void H1HighOrderHex<T_ORTHOPOL> :: CalcShapeDShape (const IntegrationPoint & ip, 
    ARRAY<AutoDiff<3> > & sds) const
    { 
    AutoDiff<3> x (ip(0), 0);
    AutoDiff<3> y (ip(1), 1);
    AutoDiff<3> z (ip(2), 2);
    int i, j, k, ii, p;
    AutoDiff<3> fac;

   
    AutoDiff<3> lami[8]={(1-x)*(1-y)*(1-z),x*(1-y)*(1-z),x*y*(1-z),(1-x)*y*(1-z),
    (1-x)*(1-y)*z,x*(1-y)*z,x*y*z,(1-x)*y*z}; 
    AutoDiff<3> sigma[8]={(1-x)+(1-y)+(1-z),x+(1-y)+(1-z),x+y+(1-z),(1-x)+y+(1-z),
    (1-x)+(1-y)+z,x+(1-y)+z,x+y+z,(1-x)+y+z}; 

    // vertex shapes
    for(i=0; i<8; i++) sds[i] = lami[i]; 
    ii = 8;

    if (augmented == 1)
    for (i = 0; i < 8; i++)
    sds[ii++] = T_VERTEXSHAPES::Calc (order, sds[i]);
    else
    if (augmented == 2)
    for (i = 0; i < 8; i++)
    {
    ArrayMem<AutoDiff<3>,20> pol(order+1);
    LegendrePolynomial (order-1, -1+2*sds[i], pol);	
    for (j = 1; j < order; j++)
    sds[ii++] = pol[j] * sds[i];
    }


    ArrayMem<AutoDiff<3>,20> polx(order+1), poly(order+1), polz(order+1);
    
    // edge dofs
    const EDGE * edges = ElementTopology::GetEdges (ET_HEX);
    const POINT3D * points = ElementTopology::GetVertices (ET_HEX);
    for (i = 0; i < 12; i++)
    {
    p = order_edge[i];
    int es = edges[i][0];
    int ee = edges[i][1];
    if (vnums[es] > vnums[ee]) swap (es, ee);

    AutoDiff<3> xi = sigma[ee]-sigma[es]; 
    AutoDiff<3> lam_e = lami[es]+lami[ee];
    T_ORTHOPOL::Calc (p, xi, polx);

    for (j = 0; j < p-1; j++)
    sds[ii++] = lam_e * polx[j];
    }
     
    const FACE * faces = ElementTopology::GetFaces (ET_HEX);
    for (i = 0; i < 6; i++)
    {
    p = order_face[i];
    if (p < 2) continue;

    AutoDiff<3> lam_f = 0;
    for (j = 0; j < 4; j++)
    lam_f += lami[faces[i][j]];
	
    int fmax = 0;
    for (j=1; j<4; j++) 
    if (vnums[faces[i][j]] > vnums[faces[i][fmax]]) fmax = j;  

    int f1 = faces[i][(fmax+3)%4];
    int f2 = faces[i][(fmax+1)%4]; 
    fmax = faces[i][fmax]; 

    if(vnums[f2]>vnums[f1]) swap(f1,f2);  // fmax > f1 > f2 
       
    AutoDiff<3> xi  = sigma[fmax] - sigma[f1]; 
    AutoDiff<3> eta = sigma[fmax] - sigma[f2];

    T_ORTHOPOL::Calc (p, xi, polx);	
    T_ORTHOPOL::Calc (p, eta, poly);	

    for (k = 0; k < p-1; k++) 
    for (j = 0; j < p-1; j++) 
    sds[ii++]= polx[k] * poly[j] * lam_f; 
    }
    
    // volume dofs:
    p = order_inner;
    if (p >= 2)
    {
    T_ORTHOPOL::Calc (p, 2*x-1, polx);
    T_ORTHOPOL::Calc (p, 2*y-1, poly);
    T_ORTHOPOL::Calc (p, 2*z-1, polz);
    for (i = 0; i < p-1; i++)
    for (j = 0; j < p-1; j++)
    {
    AutoDiff<3> pxy = polx[i] * poly[j];
    for (k = 0; k < p-1; k++)
    sds[ii++] = pxy * polz[k];
    }
    }
    }
  */

  /* *********************** Pyramid  **********************/

  template <class T_ORTHOPOL>
  H1HighOrderPyramid<T_ORTHOPOL> :: H1HighOrderPyramid (int aorder)
    : H1HighOrderFiniteElement(3, ET_PYRAMID)
  {
    order_inner = aorder;
    for (int i = 0; i < 8; i++)
      order_edge[i] = aorder;
    for (int i = 0; i < 5; i++)
      order_face[i] = aorder;

    ComputeNDof();
  }


  template <class T_ORTHOPOL>
  void H1HighOrderPyramid<T_ORTHOPOL> :: ComputeNDof()
  {
    ndof = 5;

    if (augmented == 1) ndof *= 2;
    if (augmented == 2) ndof *= order_inner;

    for (int i = 0; i < 8; i++)
      ndof += order_edge[i] - 1;
    for (int i = 0; i < 4; i++)
      ndof += (order_face[i]-1)*(order_face[i]-2) / 2;
    ndof += (order_face[4]-1)*(order_face[4]-1);
    ndof += (order_inner-1)*(order_inner-2)*(2*order_inner-3) / 6;


    order = 1;
    for (int i = 0; i < 8; i++)
      if (order_edge[i] > order)
	order = order_edge[i];
    for (int i = 0; i < 5; i++)
      if (order_face[i] > order)
	order = order_face[i];
    if (order_inner > order)
      order = order_inner;
  }


  template <class T_ORTHOPOL>
  void H1HighOrderPyramid<T_ORTHOPOL> ::  
  GetInternalDofs (ARRAY<int> & idofs) const
  {
    idofs.SetSize (0);

    int base = 5;
    if (augmented == 1) base *= 2;
    if (augmented == 2) base *= order_inner;

    for (int i = 0; i < 8; i++)
      base += order_edge[i] - 1;
    for (int i = 0; i < 4; i++)
      base += (order_face[i]-1)*(order_face[i]-2) / 2;
    base += (order_face[4]-1)*(order_face[4]-1);

    if(order_inner > 2)
      {
	int ni = (order_inner-1)*(order_inner-2)*(2*order_inner-3) / 6;
	for (int i = 0; i < ni; i++)
	  idofs.Append (base+i);
      }
  }

  template <class T_ORTHOPOL>
  void H1HighOrderPyramid<T_ORTHOPOL> :: CalcShape (const IntegrationPoint & ip, 
						    FlatVector<> shape) const
  {
    T_CalcShape<double, double, double, FlatVector<> > (ip(0), ip(1), ip(2), shape); 
    /*
      ArrayMem<AutoDiff<3>, 40> sds(ndof);
      CalcShapeDShape (ip, sds);
      for (int i = 0; i < ndof; i++)
      shape(i) = sds[i].Value();
    */
  }

  template <class T_ORTHOPOL>  
  void H1HighOrderPyramid<T_ORTHOPOL> :: CalcDShape (const IntegrationPoint & ip, 
						     FlatMatrix<> dshape) const
  {
    AutoDiff<3> x(ip(0), 0);
    AutoDiff<3> y(ip(1), 1);
    AutoDiff<3> z(ip(2), 2);

    ArrayMem<AutoDiff<3>,40> sds(ndof);
    T_CalcShape<AutoDiff<3>, AutoDiff<3>, AutoDiff<3>, FlatArray<AutoDiff<3> > >
      (x,y,z,sds);
    for (int i = 0; i < ndof; i++)
      for (int j = 0; j < 3; j++)
	dshape(i, j) = sds[i].DValue(j);
    /*
      ArrayMem<AutoDiff<3>,40> sds(ndof);
      CalcShapeDShape (ip, sds);
      for (int i = 0; i < ndof; i++)
      for (int j = 0; j < 3; j++)
      dshape(i, j) = sds[i].DValue(j);
    */
  }



  template<typename T_ORTHOPOL> 
  template<typename Tx, typename Ty, typename Tz, typename TFA>  
  void  H1HighOrderPyramid<T_ORTHOPOL> :: T_CalcShape (Tx x, Ty y, Tz z, TFA & sds) const
  {
    if (z == 1.) z -= 1e-10;

    Tx xt = x / (1-z);
    Tx yt = y / (1-z);
    
    // Tx mux[4] = { 1-xt, xt, xt, 1-xt };
    // Tx muy[4] = { 1-yt, 1-yt, yt, yt };

    Tx sigma[4]  = { (1-xt)+(1-yt), xt+(1-yt), xt+yt, (1-xt)+yt };
    Tx lambda[4] = { (1-xt)*(1-yt), xt*(1-yt), xt*yt, (1-xt)*yt };

    for (int i = 0; i < 4; i++) 
      sds[i] = lambda[i] * (1-z);
    sds[4] = z;

    int ii = 5;

    if (augmented == 1)
      for (int i = 0; i < 5; i++)
	sds[ii++] = T_VERTEXSHAPES::Calc (order, sds[i]);
    else
      if (augmented == 2)
	for (int i = 0; i < 5; i++)
	  {
	    ArrayMem<Tx,20> pol(order+1);
	    LegendrePolynomial (order-1, -1+2*sds[i], pol);	
	    for (int j = 1; j < order; j++)
	      sds[ii++] = pol[j] * sds[i];
	  }



    //horizontal edge dofs 
    const EDGE * edges = ElementTopology::GetEdges (ET_PYRAMID);
    for (int i = 0; i < 4; i++)
      if (order_edge[i] >= 2)
	{
	  int p = order_edge[i];

	  int es = edges[i][0], ee = edges[i][1];
	  if (vnums[es] > vnums[ee]) swap (es, ee);

	  Tx lam = sigma[ee]-sigma[es]; 
	  Tx lam_edge = lambda[es] + lambda[ee];
	  
	  Tx * pedge = &sds[ii];
	  T_ORTHOPOL::CalcTrigExt (p, lam*(1-z), z, pedge);

	  for (int j = 0; j <= p-2; j++)
	    sds[ii++] *= lam_edge;
	}
    
    // vertical edges
    for (int i = 4; i < 8; i++) 
      if (order_edge[i] >= 2)
	{
	  int es = edges[i][0], ee = edges[i][1];
	  if (vnums[es] > vnums[ee]) swap (es, ee);
	  
	  Tx * pedge = &sds[ii];
	  ii += T_ORTHOPOL::CalcTrigExt (order_edge[i], sds[ee]-sds[es],  
					 1-sds[es]-sds[ee], pedge);
	}


    ArrayMem<Tx,20> polx(order+1), poly(order+1), polz(order+1);
 
    const FACE * faces = ElementTopology::GetFaces (ET_PYRAMID);
    const POINT3D * points = ElementTopology::GetVertices (ET_PYRAMID);

    // trig face dofs
    for (int i = 0; i < 4; i++)
      if (order_face[i] >= 3)
	{
	  int p = order_face[i];
	  Tx lam_face = lambda[faces[i][0]] + lambda[faces[i][1]];  
	  Tx bary[3] = 
	    {(sigma[faces[i][0]]-lam_face)*(1-z), (sigma[faces[i][1]]-lam_face)*(1-z), z}; 
			     
	  int fav[3] = {0, 1, 2};
	  if(vnums[faces[i][fav[0]]] > vnums[faces[i][fav[1]]]) swap(fav[0],fav[1]); 
	  if(vnums[faces[i][fav[1]]] > vnums[faces[i][fav[2]]]) swap(fav[1],fav[2]);
	  if(vnums[faces[i][fav[0]]] > vnums[faces[i][fav[1]]]) swap(fav[0],fav[1]); 	
	 
	  Tx * pface = &sds[ii];
	  
	  int ndf = TrigShapesInnerLegendre::Calc 
	    (p, bary[fav[2]]-bary[fav[1]], bary[fav[0]], pface);
	  
	  for (int j = ii; j < ii+ndf; j++)
	    sds[j] *= lam_face; 
	  ii += ndf;
	}
    
    // quad face dof
    if (order_face[4] >= 2)
      {
	int p = order_face[4];
	Tx fac = 1.0;
	for (int k = 1; k <= p; k++)
	  fac *= (1-z);

	int fmax = 0;
	for (int j=1; j<4; j++) 
	  if (vnums[j] > vnums[fmax]) fmax = j;  

	int f1 = (fmax+3)%4;
	int f2 = (fmax+1)%4; 
	if(vnums[f2]>vnums[f1]) swap(f1,f2);  // fmax > f1 > f2 
       
 	Tx xi  = sigma[fmax] - sigma[f1]; 
	Tx eta = sigma[fmax] - sigma[f2];

	T_ORTHOPOL::Calc (p, xi, polx);	
	T_ORTHOPOL::Calc (p, eta, poly);	

	for (int k = 0; k < p-1; k++) 
	  for (int j = 0; j < p-1; j++) 
	    sds[ii++]= polx[k] * poly[j] * fac; 
      }

    if (order_inner >= 3)
      {
	int p = order_inner;
	LegendrePolynomial (p-2, -1+2*xt, polx);
	LegendrePolynomial (p-2, -1+2*yt, poly);
	Tx bub = xt*(1-xt)*yt*(1-yt)*z*(1-z)*(1-z);
	
	for(int i = 0; i <= p-3; i++)
	  {
	    for (int j = 0; j <= i; j++)
	      {
		Tx bubpj = bub * polx[j];
		for (int k = 0; k <= i; k++)
		  sds[ii++] = bubpj * poly[k];
	      }
	    bub *= 1-z; 
	  }
      }
  }





  /*
    template <class T_ORTHOPOL>
    void H1HighOrderPyramid<T_ORTHOPOL> :: CalcShapeDShape (const IntegrationPoint & ip, 
    ARRAY<AutoDiff<3> > & sds) const
    {
    AutoDiff<3> x (ip(0), 0);
    AutoDiff<3> y (ip(1), 1);
    AutoDiff<3> z (ip(2), 2);
    int i, j, k, ii, p;
    AutoDiff<3> fac;

    if (z.Value() == 1.) 
    z = AutoDiff<3> (1.-1e-10, 2);

    AutoDiff<3> xt = x / (1-z);
    AutoDiff<3> yt = y / (1-z);
    
    AutoDiff<3> mux[4] = { 1-xt, xt, xt, 1-xt };
    AutoDiff<3> muy[4] = { 1-yt, 1-yt, yt, yt };

    AutoDiff<3> sigma[4]={(1-xt)+(1-yt),xt+(1-yt),xt+yt,(1-xt)+yt };

    for (i = 0; i < 4; i++) 
    sds[i] = mux[i] * muy[i] * (1-z);
    sds[4] = z;

    ii = 5;

    if (augmented == 1)
    for (i = 0; i < 5; i++)
    sds[ii++] = T_VERTEXSHAPES::Calc (order, sds[i]);
    else
    if (augmented == 2)
    for (i = 0; i < 5; i++)
    {
    ArrayMem<AutoDiff<3>,20> pol(order+1);
    LegendrePolynomial (order-1, -1+2*sds[i], pol);	
    for (j = 1; j < order; j++)
    sds[ii++] = pol[j] * sds[i];
    }


    ArrayMem<AutoDiff<3>,20> polx(order+1), poly(order+1), polz(order+1);

    //horizontal edge dofs 
    const EDGE * edges = ElementTopology::GetEdges (ET_PYRAMID);
    for (i = 0; i < 4; i++)
    {
    p = order_edge[i];
    int es = edges[i][0];
    int ee = edges[i][1];
    if (vnums[es] > vnums[ee]) swap (es, ee);
    AutoDiff<3> lam = mux[ee]-mux[es] + muy[ee]-muy[es];
    AutoDiff<3> lam_edge = (sds[es] + sds[ee]) / (1-z);

    T_ORTHOPOL::CalcTrigExt (p, lam*(1-z), z, polx);

    for (j = 0; j <= p-2; j++)
    sds[ii++] = lam_edge * polx[j];
    }
    
    // vertical edges
    for (i = 4; i < 8; i++) 
    {
    int es = edges[i][0];
    int ee = edges[i][1];
    if (vnums[es] > vnums[ee]) swap (es, ee);

    AutoDiff<3> * pedge = &sds[ii];
    ii += T_ORTHOPOL::CalcTrigExt (order_edge[i], sds[ee]-sds[es],  
    1-sds[es]-sds[ee], pedge);
    }

 
    const FACE * faces = ElementTopology::GetFaces (ET_PYRAMID);
    const POINT3D * points = ElementTopology::GetVertices (ET_PYRAMID);

    // trig face dofs
    for (i = 0; i < 4; i++)
    {
    p = order_face[i];
    if (p < 3) continue;
    int fav[3] = { faces[i][0], faces[i][1], faces[i][2] };
    if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 
    if(vnums[fav[1]] > vnums[fav[2]]) swap(fav[1],fav[2]);
    if(vnums[fav[0]] > vnums[fav[1]]) swap(fav[0],fav[1]); 	

    AutoDiff<3> lam_face = (sds[faces[i][0]] + sds[faces[i][1]]) / (1-z); //horizontal vertices
    AutoDiff<3> bary[5];
 
    if (lam_face.Value() > 1e-10)
    for (j = 0; j < 4; j++)
    bary[j] = sds[j] / lam_face;
    else
    for (j = 0; j < 4; j++)
    bary[j] = 0.0;

    bary[4] = z;

    AutoDiff<3> * pface = &sds[ii];

    int ndf = TrigShapesInnerLegendre::Calc 
    (p, bary[fav[2]]-bary[fav[1]], bary[fav[0]], pface);

    for (j = ii; j < ii+ndf; j++)
    sds[j] *= lam_face; 
    ii += ndf;
    }

    // quad face dof
    p = order_face[4];
    if (p >= 2)
    {
    AutoDiff<3> fac;
    fac = 1.0;
    for (k = 1; k <= p; k++)
    fac *= (1-z);

    int fmax = 0;
    for (j=1; j<4; j++) 
    if (vnums[j] > vnums[fmax]) fmax = j;  

    int f1 = (fmax+3)%4;
    int f2 = (fmax+1)%4; 
    if(vnums[f2]>vnums[f1]) swap(f1,f2);  // fmax > f1 > f2 
       
    AutoDiff<3> xi  = sigma[fmax] - sigma[f1]; 
    AutoDiff<3> eta = sigma[fmax] - sigma[f2];

    T_ORTHOPOL::Calc (p, xi, polx);	
    T_ORTHOPOL::Calc (p, eta, poly);	

    for (k = 0; k < p-1; k++) 
    for (j = 0; j < p-1; j++) 
    sds[ii++]= polx[k] * poly[j] * fac; 
    }

    p = order_inner;
    if (p >= 3)
    {
    LegendrePolynomial (p-2, -1+2*xt, polx);
    LegendrePolynomial (p-2, -1+2*yt, poly);
    AutoDiff<3> bub = xt*(1-xt)*yt*(1-yt)*z*(1-z)*(1-z);
	
    for(i = 0; i <= p-3; i++)
    {
    for (j = 0; j <= i; j++)
    for (k = 0; k <= i; k++)
    sds[ii++] = bub * polx[j] * poly[k];
    bub *= 1-z;
    }
    }
    }
  */
    







  template class H1HighOrderSegm<IntegratedLegendreMonomialExt>;
  template class H1HighOrderTrig<IntegratedLegendreMonomialExt>;
  template class H1HighOrderQuad<IntegratedLegendreMonomialExt>;
  template class H1HighOrderTet<IntegratedLegendreMonomialExt>;
  template class H1HighOrderPrism<IntegratedLegendreMonomialExt>;
  template class H1HighOrderHex<IntegratedLegendreMonomialExt>;
  template class H1HighOrderPyramid<IntegratedLegendreMonomialExt>;

  template class H1HighOrderSegm<TrigExtensionMonomial>;
  template class H1HighOrderSegm<TrigExtensionOptimal>;
  template class H1HighOrderSegm<TrigExtensionMin>;

  template class H1HighOrderTrig<TrigExtensionMonomial>;
  template class H1HighOrderTrig<TrigExtensionOptimal>;
  template class H1HighOrderTrig<TrigExtensionMin>;

  template class H1HighOrderQuad<TrigExtensionMonomial>;
  template class H1HighOrderQuad<TrigExtensionOptimal>;
  template class H1HighOrderQuad<TrigExtensionMin>;

  template class H1HighOrderTet<TrigExtensionMonomial>;
  template class H1HighOrderTet<TrigExtensionOptimal>;
  template class H1HighOrderTet<TrigExtensionMin>;

  template class H1HighOrderPrism<TrigExtensionMonomial>;
  template class H1HighOrderPrism<TrigExtensionOptimal>;
  template class H1HighOrderPrism<TrigExtensionMin>;

  template class H1HighOrderHex<TrigExtensionMonomial>;
  template class H1HighOrderHex<TrigExtensionOptimal>;
  template class H1HighOrderHex<TrigExtensionMin>;

  template class H1HighOrderPyramid<TrigExtensionMonomial>;
  template class H1HighOrderPyramid<TrigExtensionOptimal>;
  template class H1HighOrderPyramid<TrigExtensionMin>;

}

