diff -rupN gromacs-5.0/CMakeLists.txt gromacs-5.0-dftb-v6-plumed/CMakeLists.txt
--- gromacs-5.0/CMakeLists.txt	2014-06-29 21:49:53.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/CMakeLists.txt	2014-08-18 11:13:06.000000000 +0200
@@ -267,7 +267,7 @@ gmx_option_multichoice(
     GMX_QMMM_PROGRAM
     "QM package for QM/MM"
     None
-    none gaussian mopac gamess orca)
+    none gaussian mopac gamess orca dftb)
 
 gmx_dependent_cache_variable(GMX_SIMD_REF_FLOAT_WIDTH  "Reference SIMD single precision width" STRING "4" "GMX_SIMD STREQUAL REFERENCE")
 gmx_dependent_cache_variable(GMX_SIMD_REF_DOUBLE_WIDTH "Reference SIMD double precision width" STRING "2" "GMX_SIMD STREQUAL REFERENCE")
@@ -676,6 +676,8 @@ elseif(${GMX_QMMM_PROGRAM} STREQUAL "GAM
     set(GMX_QMMM_GAMESS 1)
 elseif(${GMX_QMMM_PROGRAM} STREQUAL "ORCA")
     set(GMX_QMMM_ORCA 1)
+elseif(${GMX_QMMM_PROGRAM} STREQUAL "DFTB")
+    set(GMX_QMMM_DFTB 1)
 elseif(${GMX_QMMM_PROGRAM} STREQUAL "NONE")
     # nothing to do
 else()
diff -rupN gromacs-5.0/Plumed.cmake gromacs-5.0-dftb-v6-plumed/Plumed.cmake
--- gromacs-5.0/Plumed.cmake	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/Plumed.cmake	2015-02-10 09:55:04.839958435 +0100
@@ -0,0 +1,3 @@
+# PLUMED: shared installation
+set(PLUMED_LOAD  /home/tomas/GMX-DFTB/plumed-2.1.1-release/lib/plumed/src/lib/libplumed.so -ldl )
+set(PLUMED_DEPENDENCIES  /home/tomas/GMX-DFTB/plumed-2.1.1-release/lib/plumed/src/lib/libplumed.so)
diff -rupN gromacs-5.0/Plumed.h gromacs-5.0-dftb-v6-plumed/Plumed.h
--- gromacs-5.0/Plumed.h	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/Plumed.h	2014-10-23 17:18:10.000000000 +0200
@@ -0,0 +1,494 @@
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+   Copyright (c) 2011-2014 The plumed team
+   (see the PEOPLE file at the root of the distribution for a list of names)
+
+   See http://www.plumed-code.org for more information.
+
+   This file is part of plumed, version 2.
+
+   plumed is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   plumed is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with plumed.  If not, see <http://www.gnu.org/licenses/>.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+#ifndef __PLUMED_wrapper_Plumed_h
+#define __PLUMED_wrapper_Plumed_h
+
+/**
+\page ReferencePlumedH Reference for interfacing MD codes with PLUMED
+
+  Plumed.h and Plumed.c contain the external plumed interface, which is used to
+  integrate it with MD engines. This interface is very general, and is expected
+  not to change across plumed versions. Plumed.c also implements a dummy version
+  of the interface, so as to allow a code to be fully linked even if the plumed
+  library is not available yet. These files could be directly included in the official
+  host MD distribution. In this manner, it will be sufficient to link the plumed
+  library at link time (on all systems) or directly at runtime (on system where
+  dynamic loading is enabled) to include plumed features.
+
+  Why is Plumed.c written in C and not C++? The reason is that the resulting Plumed.o
+  needs to be linked with the host MD code immediately (whereas the rest of plumed
+  could be linked a posteriori). Imagine the MD code is written in FORTRAN: when we
+  link the Plumed.o file we would like not to need any C++ library linked. In this
+  manner, we do not need to know which C++ compiler will be used to compile plumed.
+  The C++ library is only linked to the "rest" of plumed, which actually use it.
+  Anyway, Plumed.c is written in such a manner to allow its compilation also in C++
+  (C++ is a bit stricter than C; compatibility is checked when PlumedStatic.cpp,
+  which basically includes Plumed.c, is compiled with the C++ compiler). This will
+  allow e.g. MD codes written in C++ to just incorporate Plumed.c (maybe renamed into
+  Plumed.cpp), without the need of configuring a plain C compiler.
+
+  Plumed interface can be used from C, C++ and FORTRAN. Everything concerning plumed
+  is hidden inside a single object type, which is described in C by a structure
+  (struct \ref plumed), in C++ by a class (PLMD::Plumed) and in FORTRAN by a
+  fixed-length string (CHARACTER(LEN=32)). Obviously C++ can use both struct
+  and class interfaces, but the first should be preferred. The reference interface
+  is the C one, whereas FORTRAN and C++ interfaces are implemented as wrappers
+  around it.
+
+  In the C++ interface, all the routines are implemented as methods of PLMD::Plumed.
+  In the C and FORTRAN interfaces, all the routines are named plumed_*, to
+  avoid potential name clashes. Notice that the entire plumed library
+  is implemented in C++, and it is hidden inside the PLMD namespace.
+
+  Handlers to the plumed object can be converted among different representations,
+  to allow inter-operability among languages. In C, there are tools to convert
+  to/from FORTRAN, whereas in C++ there are tools to convert to/from FORTRAN and C.
+
+  These handlers only contain a pointer to the real structure, so that
+  when a plumed object is brought from one language to another,
+  it brings a reference to the same environment.
+
+  Moreover, to simplify life in all cases where a single Plumed object is
+  required for the entire simulation (which covers most of the practical
+  applications with conventional MD codes) it is possible to take advantage
+  of a global interface, which is implicitly referring to a unique global instance.
+  The global object should still be initialized and finalized properly.
+
+  The basic method to send a message to plumed is
+\verbatim
+  (C) plumed_cmd
+  (C++) PLMD::Plumed::cmd
+  (FORTRAN)  PLUMED_F_CMD
+\endverbatim
+
+  To initialize a plumed object, use:
+\verbatim
+  (C)        plumed_create
+  (C++)      (constructor of PLMD::Plumed)
+  (FORTRAN)  PLUMED_F_CREATE
+\endverbatim
+
+  To finalize it, use
+\verbatim
+  (C)        plumed_finalize
+  (C++)      (destructor of PLMD::Plumed)
+  (FORTRAN)  PLUMED_F_FINALIZE
+\endverbatim
+
+  To access to the global-object, use
+\verbatim
+  (C)        plumed_gcreate, plumed_gfinalize, plumed_gcmd
+  (C++)      PLMD::Plumed::gcreate, PLMD::Plumed::gfinalize, PLMD::Plumed::gcmd
+  (FORTRAN)  PLUMED_F_GCREATE, PLUMED_F_GFINALIZE, PLUMED_F_GCMD
+\endverbatim
+
+  To check if the global object has been initialized, use
+\verbatim
+  (C)        plumed_ginitialized
+  (C++)      PLMD::Plumed::ginitialized
+  (FORTRAN)  PLUMED_F_GINITIALIZED
+\endverbatim
+
+  To check if plumed library is available (this is useful for runtime linking), use
+\verbatim
+  (C)        plumed_installed 
+  (C++)      PLMD::Plumed::installed
+  (FORTRAN)  PLUMED_F_INSTALLED
+\endverbatim
+
+  To convert handlers use
+\verbatim
+  (C)        plumed_c2f                 (C to FORTRAN)
+  (C)        plumed_f2c                 (FORTRAN to C)
+  (C++)      Plumed(plumed) constructor (C to C++)
+  (C++)      operator plumed() cast     (C++ to C)
+  (C++)      Plumed(char*)  constructor (FORTRAN to C++)
+  (C++)      toFortran(char*)           (C++ to FORTRAN)
+\endverbatim
+
+\verbatim
+  FORTRAN interface
+    SUBROUTINE PLUMED_F_INSTALLED(i)
+      INTEGER,           INTENT(OUT)   :: i
+    SUBROUTINE PLUMED_F_GINITIALIZED(i)
+      INTEGER,           INTENT(OUT)   :: i
+    SUBROUTINE PLUMED_F_GCREATE()
+    SUBROUTINE PLUMED_F_GCMD(key,val)
+      CHARACTER(LEN=*), INTENT(IN)     :: key
+      UNSPECIFIED_TYPE, INTENT(INOUT)  :: val(*)
+    SUBROUTINE PLUMED_F_GFINALIZE()
+    SUBROUTINE PLUMED_F_GLOBAL(p)
+      CHARACTER(LEN=32), INTENT(OUT)   :: p
+    SUBROUTINE PLUMED_F_CREATE(p)
+      CHARACTER(LEN=32), INTENT(OUT)   :: p
+    SUBROUTINE PLUMED_F_CMD(p,key,val)
+      CHARACTER(LEN=32), INTENT(IN)    :: p
+      CHARACTER(LEN=*),  INTENT(IN)    :: key
+      UNSPECIFIED_TYPE,  INTENT(INOUT) :: val(*)
+    SUBROUTINE PLUMED_F_FINALIZE(p)
+      CHARACTER(LEN=32), INTENT(IN)    :: p
+\endverbatim
+
+  The main routine is "cmd", which accepts two arguments:
+  key is a string containing the name of the command
+  val is the argument. it is declared const so as to use allow passing const objects, but in practice plumed
+      is going to modify val in several cases (using a const_cast).
+  In some cases val can be omitted: just pass a NULL pointer (in C++, val is optional and can be omitted).
+  The set of possible keys is the real API of the plumed library, and will be expanded with time.
+  New commands will be added, but backward compatibility will be retained as long as possible.
+
+  To pass plumed a callback function use the following syntax (not available in FORTRAN yet)
+\verbatim
+    plumed_function_holder ff;
+    ff.p=your_function;
+    plumed_cmd(plumed,"xxxx",&ff);
+\endverbatim
+  (this is passing the your_function() function to the "xxxx" command)
+*/
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+/* Generic function pointer */
+typedef void (*plumed_function_pointer)(void);
+
+/**
+  \brief Holder for function pointer.
+
+  To pass plumed a callback function use the following syntax:
+\verbatim
+    plumed_function_holder ff;
+    ff.p=your_function;
+    plumed_cmd(plumed,"xxxx",&ff);
+\endverbatim
+  (this is going to pass the your_function() function to the "xxxx" command)
+*/
+
+typedef struct {
+  plumed_function_pointer p;
+} plumed_function_holder;
+
+/**
+  \brief Main plumed object
+
+  This is an object containing a Plumed instance, which should be used in
+  the MD engine. It should first be initialized with plumed_create(),
+  then it communicates with the MD engine using plumed_cmd(). Finally,
+  before the termination, it should be deallocated with plumed_finalize().
+  Its interface is very simple and general, and is expected
+  not to change across plumed versions. See \ref ReferencePlumedH.
+*/
+typedef struct {
+/**
+  \private
+  \brief Void pointer holding the real PlumedMain structure
+*/
+  void*p;
+} plumed;
+
+/** \relates plumed
+    \brief Constructor
+
+    \return The constructed plumed object
+*/
+plumed plumed_create(void);
+
+/** \relates plumed
+    \brief Tells p to execute a command
+
+    \param p The plumed object on which command is acting
+    \param key The name of the command to be executed
+    \param val The argument. It is declared as const to allow calls like plumed_cmd(p,"A","B"),
+               but for some choice of key it can change the content
+*/
+void plumed_cmd(plumed p,const char*key,const void*val);
+
+/** \relates plumed
+    \brief Destructor
+
+    \param p The plumed object to be deallocated
+*/
+void plumed_finalize(plumed p);
+
+/** \relates plumed
+    \brief Check if plumed is installed (for runtime binding)
+
+    \return 1 if plumed is installed, to 0 otherwise
+*/
+int plumed_installed(void);
+
+/** \relates plumed
+    \brief Retrieves an handler to the global structure.
+*/
+plumed plumed_global(void);
+
+/** \relates plumed
+    \brief Check if the global interface has been initialized
+
+    \return 1 if plumed has been initialized, 0 otherwise
+*/
+int plumed_ginitialized(void);
+
+/* global C interface, working on a global object */
+
+/** \relates plumed
+    \brief Constructor for the global interface.
+
+    \note Equivalent to plumed_create(), but initialize a static global plumed object
+*/
+void plumed_gcreate(void);
+
+/** \relates plumed
+    \brief Tells to the global interface to execute a command.
+
+    \param key The name of the command to be executed
+    \param val The argument. It is declared as const to allow calls like plumed_gcmd("A","B"),
+               but for some choice of key it can change the content
+
+    \note Equivalent to plumed_cmd(), but skipping the plumed argument
+*/
+void plumed_gcmd(const char* key,const void* val);
+
+/** \relates plumed
+    \brief Destructor for the global interface.
+
+    \note Equivalent to plumed_finalize(), but skipping the plumed argument
+*/
+void plumed_gfinalize(void);
+
+/* routines to convert char handler from/to plumed objects */
+
+/** \related plumed
+    \brief Converts a C handler to a FORTRAN handler
+
+    \param p The C handler
+    \param c The FORTRAN handler (a char[32])
+*/
+void   plumed_c2f(plumed p,char* c);
+
+/** \related plumed
+    \brief Converts a FORTRAN handler to a C handler
+    \param c The FORTRAN handler (a char[32])
+    \return The C handler
+*/
+plumed plumed_f2c(const char* c);
+
+#ifdef __cplusplus
+ }
+#endif
+
+#ifdef __cplusplus
+
+/* this is to include the NULL pointer */
+#include <cstdlib>
+
+/* C++ interface is hidden in PLMD namespace (same as plumed library) */
+namespace PLMD {
+
+/**
+  C++ wrapper for \ref plumed.
+
+  This class provides a C++ interface to PLUMED.
+*/
+
+class Plumed{
+  plumed main;
+/**
+   keeps track if the object was created from scratch using 
+   the defaults destructor (cloned=false) or if it was imported
+   from C or FORTRAN (cloned-true). In the latter case, the
+   plumed_finalize() method is not called when destructing the object,
+   since it is expected to be finalized in the C/FORTRAN code
+*/
+  bool cloned;
+public:
+/**
+   Check if plumed is installed (for runtime binding)
+   \return true if plumed is installed, false otherwise
+*/
+  static bool installed();
+/**
+   Check if global-plumed has been initialized
+   \return true if global plumed object (see global()) is initialized (i.e. if gcreate() has been
+           called), false otherwise.
+*/
+  static bool ginitialized();
+/**
+   Initialize global-plumed
+*/
+  static void gcreate();
+/**
+   Send a command to global-plumed
+    \param key The name of the command to be executed
+    \param val The argument. It is declared as const to allow calls like gcmd("A","B"),
+               but for some choice of key it can change the content
+*/
+  static void gcmd(const char* key,const void* val);
+/**
+   Finalize global-plumed
+*/
+  static void gfinalize();
+/**
+   Returns the Plumed global object
+   \return The Plumed global object
+*/
+  static Plumed global();
+/**
+   Constructor
+*/
+  Plumed();
+/**
+   Clone a Plumed object from a FORTRAN char* handler
+   \param c The FORTRAN handler (a char[32]).
+
+ \attention The Plumed object created in this manner
+            will not finalize the corresponding plumed structure.
+            It is expected that the FORTRAN code calls plumed_c_finalize for it
+*/
+  Plumed(const char*c);
+/**
+   Clone a Plumed object from a C plumed structure
+   \param p The C plumed structure.
+
+ \attention The Plumed object created in this manner
+            will not finalize the corresponding plumed structure.
+            It is expected that the C code calls plumed_finalize for it
+*/
+  Plumed(plumed p);
+private:
+/** Copy constructor is disabled (private and unimplemented)
+  The problem here is that after copying it will not be clear who is
+  going to finalize the corresponding plumed structure.
+*/
+  Plumed(const Plumed&);
+/** Assignment operator is disabled (private and unimplemented)
+  The problem here is that after copying it will not be clear who is
+  going to finalize the corresponding plumed structure.
+*/
+  Plumed&operator=(const Plumed&);
+public:
+/**
+   Retrieve the C plumed structure for this object
+*/
+  operator plumed()const;
+/**
+   Retrieve a FORTRAN handler for this object
+    \param c The FORTRAN handler (a char[32]).
+*/
+  void toFortran(char*c)const;
+/**
+   Send a command to this plumed object
+    \param key The name of the command to be executed
+    \param val The argument. It is declared as const to allow calls like p.cmd("A","B"),
+               but for some choice of key it can change the content
+*/
+  void cmd(const char*key,const void*val=NULL);
+/**
+   Destructor
+
+   Destructor is virtual so as to allow correct inheritance from Plumed object.
+   To avoid linking problems with g++, I specify "inline" also here (in principle
+   it should be enough to specify it down in the definition of the function, but
+   for some reason that I do not understand g++ does not inline it properly in that
+   case and complains when Plumed.h is included but Plumed.o is not linked. Anyway, the
+   way it is done here seems to work properly).
+*/
+  inline virtual ~Plumed();
+};
+
+/* All methods are inlined so as to avoid the compilation of an extra c++ file */
+
+inline
+bool Plumed::installed(){
+  return plumed_installed();
+}
+
+inline
+Plumed::Plumed():
+  main(plumed_create()),
+  cloned(false)
+{}
+
+inline
+Plumed::Plumed(const char*c):
+  main(plumed_f2c(c)),
+  cloned(true)
+{}
+
+inline
+Plumed::Plumed(plumed p):
+  main(p),
+  cloned(true)
+{}
+
+inline
+Plumed::operator plumed()const{
+  return main;
+}
+
+inline
+void Plumed::toFortran(char*c)const{
+  plumed_c2f(main,c);
+}
+
+inline
+void Plumed::cmd(const char*key,const void*val){
+  plumed_cmd(main,key,val);
+}
+
+inline
+Plumed::~Plumed(){
+  if(!cloned)plumed_finalize(main);
+}
+
+inline
+bool Plumed::ginitialized(){
+  return plumed_ginitialized();
+}
+
+inline
+void Plumed::gcreate(){
+  plumed_gcreate();
+}
+
+inline
+void Plumed::gcmd(const char* key,const void* val){
+  plumed_gcmd(key,val);
+}
+
+inline
+void Plumed::gfinalize(){
+  plumed_gfinalize();
+}
+
+inline
+Plumed Plumed::global(){
+  return plumed_global();
+}
+
+}
+
+#endif
+
+
+#endif
diff -rupN gromacs-5.0/Plumed.inc gromacs-5.0-dftb-v6-plumed/Plumed.inc
--- gromacs-5.0/Plumed.inc	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/Plumed.inc	2015-02-10 09:55:19.703924666 +0100
@@ -0,0 +1,3 @@
+# PLUMED: shared installation
+PLUMED_LOAD= /home/tomas/GMX-DFTB/plumed-2.1.1-release/lib/plumed///src/lib//home/tomas/GMX-DFTB/plumed-2.1.1-release/lib/plumed//src/lib/libplumed.so -ldl
+PLUMED_DEPENDENCIES= /home/tomas/GMX-DFTB/plumed-2.1.1-release/lib/plumed///src/lib//home/tomas/GMX-DFTB/plumed-2.1.1-release/lib/plumed//src/lib/libplumed.so
diff -rupN gromacs-5.0/src/config.h.cmakein gromacs-5.0-dftb-v6-plumed/src/config.h.cmakein
--- gromacs-5.0/src/config.h.cmakein	2014-06-17 17:14:19.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/config.h.cmakein	2014-08-18 11:16:54.000000000 +0200
@@ -172,6 +172,9 @@
 /* Use ORCA for QM-MM calculations */
 #cmakedefine GMX_QMMM_ORCA
 
+/* Use built-in DFTB for QM-MM calculations */
+#cmakedefine GMX_QMMM_DFTB
+
 /* Use the GROMACS software 1/sqrt(x) */
 #cmakedefine GMX_SOFTWARE_INVSQRT
 
diff -rupN gromacs-5.0/src/gromacs/CMakeLists.txt gromacs-5.0-dftb-v6-plumed/src/gromacs/CMakeLists.txt
--- gromacs-5.0/src/gromacs/CMakeLists.txt	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/CMakeLists.txt	2015-02-10 13:59:34.738135370 +0100
@@ -32,6 +32,8 @@
 # To help us fund GROMACS development, we humbly ask that you cite
 # the research papers on the package. Check out http://www.gromacs.org.
 
+include(${CMAKE_SOURCE_DIR}/Plumed.cmake)
+
 set(LIBGROMACS_SOURCES)
 
 function (gmx_install_headers DESTINATION)
@@ -160,6 +162,8 @@ if(GMX_USE_GCC44_BUG_WORKAROUND)
 endif()
 
 add_library(libgromacs ${LIBGROMACS_SOURCES})
+#add_library(liblevmar "/home/tomas/GMX-DFTB/levmar-2.6/liblevmar.so")
+#SET_TARGET_PROPERTIES(liblevmar PROPERTIES LINKER_LANGUAGE C)
 if (GMX_GIT_VERSION_INFO)
     add_dependencies(libgromacs gmx-version)
 endif()
@@ -193,7 +197,7 @@ target_link_libraries(libgromacs
                       ${TNG_IO_LIBRARIES}
                       ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES}
                       ${XML_LIBRARIES}
-                      ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS})
+                      ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} ${PLUMED_LOAD}) # liblevmar)
 set_target_properties(libgromacs PROPERTIES
                       OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}"
                       SOVERSION ${LIBRARY_SOVERSION}
diff -rupN gromacs-5.0/src/gromacs/fileio/tpxio.c gromacs-5.0-dftb-v6-plumed/src/gromacs/fileio/tpxio.c
--- gromacs-5.0/src/gromacs/fileio/tpxio.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/fileio/tpxio.c	2014-09-02 22:22:50.000000000 +0200
@@ -1768,6 +1768,18 @@ static void do_inputrec(t_fileio *fio, t
         gmx_fio_do_gmx_bool(fio, ir->bQMMM);
         gmx_fio_do_int(fio, ir->QMMMscheme);
         gmx_fio_do_real(fio, ir->scalefactor);
+        gmx_fio_do_int(fio, ir->QMdftbsccmode);
+        gmx_fio_do_real(fio, ir->QMdftbtelec);
+        gmx_fio_do_string(fio, ir->QMdftbslkopath);
+        gmx_fio_do_string(fio, ir->QMdftbslkoseparator);
+        gmx_fio_do_gmx_bool(fio, ir->QMdftbslkolowercase);
+        gmx_fio_do_string(fio, ir->QMdftbslkosuffix);
+        gmx_fio_do_int(fio, ir->QMdftbpartialpme);
+        gmx_fio_do_int(fio, ir->QMdftbdispersion);
+        gmx_fio_do_int(fio, ir->QMdftbcdko);
+        if (ir->QMdftbcdko > 0) {
+            gmx_fio_do_int(fio, ir->QMdftbmmhubinf);
+        }
         gmx_fio_do_int(fio, ir->opts.ngQM);
         if (bRead)
         {
diff -rupN gromacs-5.0/src/gromacs/gmxlib/gmx_omp_nthreads.c gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/gmx_omp_nthreads.c
--- gromacs-5.0/src/gromacs/gmxlib/gmx_omp_nthreads.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/gmx_omp_nthreads.c	2014-08-18 11:19:58.000000000 +0200
@@ -337,6 +337,8 @@ void gmx_omp_nthreads_init(FILE *fplog,
         {
             modth.gnth = 1;
         }
+        /* Tomas Kubar */
+        printf("Insider info: modth.gnth == %d\n", modth.gnth);
 
         if (bSepPME)
         {
diff -rupN gromacs-5.0/src/gromacs/gmxlib/mvdata.c gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/mvdata.c
--- gromacs-5.0/src/gromacs/gmxlib/mvdata.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/mvdata.c	2014-08-18 11:20:56.000000000 +0200
@@ -792,12 +792,14 @@ static void bc_atomtypes(const t_commrec
     snew_bc(cr, atomtypes->surftens, nr);
     snew_bc(cr, atomtypes->gb_radius, nr);
     snew_bc(cr, atomtypes->S_hct, nr);
+    snew_bc(cr, atomtypes->atomnumber, nr);
 
     nblock_bc(cr, nr, atomtypes->radius);
     nblock_bc(cr, nr, atomtypes->vol);
     nblock_bc(cr, nr, atomtypes->surftens);
     nblock_bc(cr, nr, atomtypes->gb_radius);
     nblock_bc(cr, nr, atomtypes->S_hct);
+    nblock_bc(cr, nr, atomtypes->atomnumber);
 }
 
 
diff -rupN gromacs-5.0/src/gromacs/gmxlib/nonbonded/nb_generic.c gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/nonbonded/nb_generic.c
--- gromacs-5.0/src/gromacs/gmxlib/nonbonded/nb_generic.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/nonbonded/nb_generic.c	2014-08-19 15:02:55.000000000 +0200
@@ -51,6 +51,8 @@
 #include "nonbonded.h"
 #include "nb_kernel.h"
 
+#include <stdio.h>
+
 void
 gmx_nb_generic_kernel(t_nblist *                nlist,
                       rvec *                    xx,
@@ -468,10 +470,13 @@ gmx_nb_generic_kernel(t_nblist *
         fshift[is3]      = fshift[is3]+fix;
         fshift[is3+1]    = fshift[is3+1]+fiy;
         fshift[is3+2]    = fshift[is3+2]+fiz;
+        if ((fix>1.e10) || (fix<-1.e10) || (fiy>1.e10) || (fiy<-1.e10) || (fiz>1.e10) || (fiz<-1.e10))
+          printf("  PROBLEM ATOM %d - FORCE/SHIFT TOO LARGE\n", ii+1);
         ggid             = nlist->gid[n];
         velecgrp[ggid]  += vctot;
         vvdwgrp[ggid]   += vvdwtot;
     }
+    for (is3=0; is3<SHIFTS; is3++) printf("GENERIC NB: SHIFT[%2d] = %f\n", is3, fshift[is3]);
     /* Estimate flops, average for generic kernel:
      * 12 flops per outer iteration
      * 50 flops per inner iteration
diff -rupN gromacs-5.0/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3W3_c.c gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3W3_c.c
--- gromacs-5.0/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3W3_c.c	2014-06-17 17:14:19.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3W3_c.c	2014-09-08 15:25:21.000000000 +0200
@@ -96,12 +96,15 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
     int              *vdwtype;
     real             *vdwparam;
-    int              ewitab;
+    int              ewitab, ewitab_max, iinr_max, jjnr_max;
+    real             shX_max, shY_max, shZ_max, ix0_max, iy0_max, iz0_max, jx0_max, jy0_max, jz0_max;
     real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
     real             *ewtab;
 
+   
     x                = xx[0];
     f                = ff[0];
+    ewitab_max = 0;
 
     nri              = nlist->nri;
     iinr             = nlist->iinr;
@@ -122,6 +125,10 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
     ewtabscale       = fr->ic->tabq_scale;
     ewtabhalfspace   = 0.5/ewtabscale;
 
+  //printf("inside jindex =");
+  //for (iidx=0; iidx< 10; iidx++) printf(" %d", jindex[iidx]);
+  //printf("\n");
+
     /* Setup water-specific parameters */
     inr              = nlist->iinr[0];
     iq0              = facel*charge[inr+0];
@@ -279,6 +286,20 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
             /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
             ewrt             = r00*ewtabscale;
             ewitab           = ewrt;
+            if (ewitab > ewitab_max) {
+              ewitab_max = ewitab;
+              shX_max    = shX;
+              shY_max    = shY;
+              shZ_max    = shZ;
+              ix0_max    = ix0;
+              iy0_max    = iy0;
+              iz0_max    = iz0;
+              jx0_max    = jx0;
+              jy0_max    = jy0;
+              jz0_max    = jz0;
+              iinr_max   = iinr[iidx];
+              jjnr_max   = jjnr[jidx];
+            }
             eweps            = ewrt-ewitab;
             ewitab           = 4*ewitab;
             felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
@@ -303,6 +324,9 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
             tx               = fscal*dx00;
             ty               = fscal*dy00;
             tz               = fscal*dz00;
+      //if (tx > 1000000. || tx < -1000000.) printf("\nLINE306 IIDX=%d JIDX=%d Tx = %f EWITAB = %d R = %f\n", iidx, jidx, tx, ewitab/4, r00);
+      //if (ty > 1000000. || ty < -1000000.) printf("\nLINE307 IIDX=%d JIDX=%d Ty = %f EWITAB = %d R = %f\n", iidx, jidx, ty, ewitab/4, r00);
+      //if (tz > 1000000. || tz < -1000000.) printf("\nLINE308 IIDX=%d JIDX=%d Tz = %f EWITAB = %d R = %f\n", iidx, jidx, tz, ewitab/4, r00);
 
             /* Update vectorial force */
             fix0            += tx;
@@ -338,6 +362,9 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
             tx               = fscal*dx01;
             ty               = fscal*dy01;
             tz               = fscal*dz01;
+      //if (tx > 1000000. || tx < -1000000.) printf("\nLINE344 IIDX=%d JIDX=%d Tx = %f EWITAB = %d R = %f\n", iidx, jidx, tx, ewitab/4, r01);
+      //if (ty > 1000000. || ty < -1000000.) printf("\nLINE345 IIDX=%d JIDX=%d Ty = %f EWITAB = %d R = %f\n", iidx, jidx, ty, ewitab/4, r01);
+      //if (tz > 1000000. || tz < -1000000.) printf("\nLINE346 IIDX=%d JIDX=%d Tz = %f EWITAB = %d R = %f\n", iidx, jidx, tz, ewitab/4, r01);
 
             /* Update vectorial force */
             fix0            += tx;
@@ -373,6 +400,9 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
             tx               = fscal*dx02;
             ty               = fscal*dy02;
             tz               = fscal*dz02;
+      //if (tx > 1000000. || tx < -1000000.) printf("\nLINE382 IIDX=%d JIDX=%d Tx = %f EWITAB = %d R = %f\n", iidx, jidx, tx, ewitab/4, r02);
+      //if (ty > 1000000. || ty < -1000000.) printf("\nLINE383 IIDX=%d JIDX=%d Ty = %f EWITAB = %d R = %f\n", iidx, jidx, ty, ewitab/4, r02);
+      //if (tz > 1000000. || tz < -1000000.) printf("\nLINE384 IIDX=%d JIDX=%d Tz = %f EWITAB = %d R = %f\n", iidx, jidx, tz, ewitab/4, r02);
 
             /* Update vectorial force */
             fix0            += tx;
@@ -408,6 +438,9 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
             tx               = fscal*dx10;
             ty               = fscal*dy10;
             tz               = fscal*dz10;
+      //if (tx > 1000000. || tx < -1000000.) printf("\nLINE420 IIDX=%d JIDX=%d Tx = %f EWITAB = %d R = %f\n", iidx, jidx, tx, ewitab/4, r10);
+      //if (ty > 1000000. || ty < -1000000.) printf("\nLINE421 IIDX=%d JIDX=%d Ty = %f EWITAB = %d R = %f\n", iidx, jidx, ty, ewitab/4, r10);
+      //if (tz > 1000000. || tz < -1000000.) printf("\nLINE422 IIDX=%d JIDX=%d Tz = %f EWITAB = %d R = %f\n", iidx, jidx, tz, ewitab/4, r10);
 
             /* Update vectorial force */
             fix1            += tx;
@@ -443,6 +476,9 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
             tx               = fscal*dx11;
             ty               = fscal*dy11;
             tz               = fscal*dz11;
+      //if (tx > 1000000. || tx < -1000000.) printf("\nLINE458 IIDX=%d JIDX=%d Tx = %f EWITAB = %d R = %f\n", iidx, jidx, tx, ewitab/4, r11);
+      //if (ty > 1000000. || ty < -1000000.) printf("\nLINE459 IIDX=%d JIDX=%d Ty = %f EWITAB = %d R = %f\n", iidx, jidx, ty, ewitab/4, r11);
+      //if (tz > 1000000. || tz < -1000000.) printf("\nLINE460 IIDX=%d JIDX=%d Tz = %f EWITAB = %d R = %f\n", iidx, jidx, tz, ewitab/4, r11);
 
             /* Update vectorial force */
             fix1            += tx;
@@ -478,6 +514,9 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
             tx               = fscal*dx12;
             ty               = fscal*dy12;
             tz               = fscal*dz12;
+      //if (tx > 1000000. || tx < -1000000.) printf("\nLINE496 IIDX=%d JIDX=%d Tx = %f EWITAB = %d R = %f\n", iidx, jidx, tx, ewitab/4, r12);
+      //if (ty > 1000000. || ty < -1000000.) printf("\nLINE497 IIDX=%d JIDX=%d Ty = %f EWITAB = %d R = %f\n", iidx, jidx, ty, ewitab/4, r12);
+      //if (tz > 1000000. || tz < -1000000.) printf("\nLINE498 IIDX=%d JIDX=%d Tz = %f EWITAB = %d R = %f\n", iidx, jidx, tz, ewitab/4, r12);
 
             /* Update vectorial force */
             fix1            += tx;
@@ -513,6 +552,9 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
             tx               = fscal*dx20;
             ty               = fscal*dy20;
             tz               = fscal*dz20;
+      //if (tx > 1000000. || tx < -1000000.) printf("\nLINE534 IIDX=%d JIDX=%d Tx = %f EWITAB = %d R = %f\n", iidx, jidx, tx, ewitab/4, r20);
+      //if (ty > 1000000. || ty < -1000000.) printf("\nLINE535 IIDX=%d JIDX=%d Ty = %f EWITAB = %d R = %f\n", iidx, jidx, ty, ewitab/4, r20);
+      //if (tz > 1000000. || tz < -1000000.) printf("\nLINE536 IIDX=%d JIDX=%d Tz = %f EWITAB = %d R = %f\n", iidx, jidx, tz, ewitab/4, r20);
 
             /* Update vectorial force */
             fix2            += tx;
@@ -548,6 +590,9 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
             tx               = fscal*dx21;
             ty               = fscal*dy21;
             tz               = fscal*dz21;
+      //if (tx > 1000000. || tx < -1000000.) printf("\nLINE572 IIDX=%d JIDX=%d Tx = %f EWITAB = %d R = %f\n", iidx, jidx, tx, ewitab/4, r21);
+      //if (ty > 1000000. || ty < -1000000.) printf("\nLINE573 IIDX=%d JIDX=%d Ty = %f EWITAB = %d R = %f\n", iidx, jidx, ty, ewitab/4, r21);
+      //if (tz > 1000000. || tz < -1000000.) printf("\nLINE574 IIDX=%d JIDX=%d Tz = %f EWITAB = %d R = %f\n", iidx, jidx, tz, ewitab/4, r21);
 
             /* Update vectorial force */
             fix2            += tx;
@@ -583,6 +628,13 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
             tx               = fscal*dx22;
             ty               = fscal*dy22;
             tz               = fscal*dz22;
+      //if (tx > 1000000. || tx < -1000000.) printf("\nLINE610 IIDX=%d JIDX=%d Tx = %f EWITAB = %d R = %f\n", iidx, jidx, tx, ewitab/4, r22);
+      //if (ty > 1000000. || ty < -1000000.) printf("\nLINE611 IIDX=%d JIDX=%d Ty = %f EWITAB = %d R = %f\n", iidx, jidx, ty, ewitab/4, r22);
+      //if (tz > 1000000. || tz < -1000000.) { printf("\nLINE612 IIDX=%d JIDX=%d Tz = %f EWITAB = %d R = %f\n", iidx, jidx, tz, ewitab/4, r22);
+      //    printf("\nATOMS %d and %d\n\n", inr+1, jnr+1);
+      //    printf("    %10f %10f %10f %10f %10f %10f\n", qq22, ewtabscale, ewtabhalfspace, ewtab[ewitab], ewtab[ewitab+1], ewtab[ewitab+2]);
+      //    printf("    %10f %10f %10f %10f %10f %10d %10f %10f\n", r22, rsq22, rinv22, ewrt, eweps, ewitab, velec, felec);
+      //}
 
             /* Update vectorial force */
             fix2            += tx;
@@ -626,9 +678,18 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
 
         /* Increment number of inner iterations */
         inneriter                  += j_index_end - j_index_start;
+      //printf(" %d", j_index_end - j_index_start);
 
         /* Outer loop uses 32 flops */
     }
+  //printf("\n");
+
+  //for (iidx=0; iidx<nri; iidx++)
+  //{
+  //    if (ff[iidx][0] > 1000000. || ff[iidx][0] < -1000000.) printf("W3W3: F[%d][0] = %f\n", iidx, ff[iidx][0]);
+  //    if (ff[iidx][1] > 1000000. || ff[iidx][1] < -1000000.) printf("W3W3: F[%d][1] = %f\n", iidx, ff[iidx][1]);
+  //    if (ff[iidx][2] > 1000000. || ff[iidx][2] < -1000000.) printf("W3W3: F[%d][2] = %f\n", iidx, ff[iidx][2]);
+  //}
 
     /* Increment number of outer iterations */
     outeriter        += nri;
@@ -636,6 +697,15 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
     /* Update outer/inner flops */
 
     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*372);
+  //printf("IN FUNCTION nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c()");
+  //printf(" %d %d %d %d %f %f %f %f %f %f %f %f\n", ewitab_max, iinr_max, jjnr_max, jindex[nri],
+  //      shX_max, shY_max, shZ_max, ix0_max-shX_max-jx0_max, iy0_max-shY_max-jy0_max, iz0_max-shZ_max-jz0_max,
+  //      sqrt((ix0_max-shX_max-jx0_max)*(ix0_max-shX_max-jx0_max)+ (iy0_max-shY_max-jy0_max)*(iy0_max-shY_max-jy0_max)+
+  //                          (iz0_max-shZ_max-jz0_max)*(iz0_max-shZ_max-jz0_max)),
+  //      sqrt((ix0_max-jx0_max)*(ix0_max-jx0_max)+ (iy0_max-jy0_max)*(iy0_max-jy0_max)+(iz0_max-jz0_max)*(iz0_max-jz0_max)));
+  //printf("inside jindex =");
+  //for (iidx=0; iidx< 10; iidx++) printf(" %d", jindex[iidx]);
+  //printf("\n");
 }
 /*
  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c
diff -rupN gromacs-5.0/src/gromacs/gmxlib/nonbonded/nb_kernel.c gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/nonbonded/nb_kernel.c
--- gromacs-5.0/src/gromacs/gmxlib/nonbonded/nb_kernel.c	2014-06-17 17:14:19.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/nonbonded/nb_kernel.c	2014-09-04 15:52:38.000000000 +0200
@@ -99,8 +99,18 @@ nb_kernel_list_hash_init(void)
     {
         kernel_list_hash[i] = -1;
     }
+    //printf("KERNELS HASHING:\n");
     for (i = 0; i < kernel_list_size; i++)
     {
+        printf("%d %s %s %s %s %s %s %s %s\n", i,
+                                    kernel_list[i].architecture,
+                                    kernel_list[i].electrostatics,
+                                    kernel_list[i].electrostatics_modifier,
+                                    kernel_list[i].vdw,
+                                    kernel_list[i].vdw_modifier,
+                                    kernel_list[i].geometry,
+                                    kernel_list[i].other,
+                                    kernel_list[i].vf);
         index = nb_kernel_hash_func(kernel_list[i].architecture,
                                     kernel_list[i].electrostatics,
                                     kernel_list[i].electrostatics_modifier,
@@ -118,6 +128,7 @@ nb_kernel_list_hash_init(void)
 
         kernel_list_hash[index] = i;
     }
+    //printf("END KERNELS HASHING\n");
     return 0;
 }
 
@@ -172,6 +183,7 @@ nb_kernel_list_findkernel(FILE gmx_unuse
             !gmx_strcasecmp_min(kernel_list[i].vf, vf))
         {
             kernelinfo_ptr = kernel_list+i;
+            printf("%d at address %p\n", i, kernelinfo_ptr->kernelptr);
             break;
         }
         index = (index+1) % kernel_list_hash_size;
diff -rupN gromacs-5.0/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sse4_1_double.c gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sse4_1_double.c
--- gromacs-5.0/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sse4_1_double.c	2014-06-17 17:14:19.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sse4_1_double.c	2014-09-03 00:34:52.000000000 +0200
@@ -49,6 +49,8 @@
 #include "gromacs/simd/math_x86_sse4_1_double.h"
 #include "kernelutil_x86_sse4_1_double.h"
 
+#include<stdio.h>
+
 /*
  * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse4_1_double
  * Electrostatics interaction: Ewald
@@ -1213,6 +1215,13 @@ nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sse4_
     /* Increment number of outer iterations */
     outeriter        += nri;
 
+    for (iidx=0; iidx<nri; iidx++)
+    {
+        if (ff[iidx][0] > 1000000. || ff[iidx][0] < -1000000.) printf("W3W3: F[%d][0] = %f\n", iidx, ff[iidx][0]);
+        if (ff[iidx][1] > 1000000. || ff[iidx][1] < -1000000.) printf("W3W3: F[%d][1] = %f\n", iidx, ff[iidx][1]);
+        if (ff[iidx][2] > 1000000. || ff[iidx][2] < -1000000.) printf("W3W3: F[%d][2] = %f\n", iidx, ff[iidx][2]);
+    }
+
     /* Update outer/inner flops */
 
     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*381);
diff -rupN gromacs-5.0/src/gromacs/gmxlib/nonbonded/nonbonded.c gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/nonbonded/nonbonded.c
--- gromacs-5.0/src/gromacs/gmxlib/nonbonded/nonbonded.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/nonbonded/nonbonded.c	2014-09-04 23:46:45.000000000 +0200
@@ -269,11 +269,16 @@ gmx_nonbonded_set_kernel_pointers(FILE *
 
         for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
         {
+        //printf("HINT 4\n");
             nl->kernelptr_vf       = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
+    //printf("LOOKING FOR KERNEL:\n    %s %s %s %s %s %s %s\n",
+    //   arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other);
+
         }
         for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
         {
+        //printf("HINT 4a\n");
             nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 
@@ -310,9 +315,15 @@ gmx_nonbonded_set_kernel_pointers(FILE *
          */
         if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom, "Particle-Particle"))
         {
+        //printf("HINT 5\n");
             nl->kernelptr_vf       = (void *) gmx_nb_generic_kernel;
             nl->kernelptr_f        = (void *) gmx_nb_generic_kernel;
             nl->simd_padding_width = 1;
+                printf( "WARNING - Slow generic NB kernel used for neighborlist with\n"
+                        "    Elec: '%s', Modifier: '%s'\n"
+                        "    Vdw:  '%s', Modifier: '%s'\n"
+                        "    kernel address: %p\n",
+                        elec, elec_mod, vdw, vdw_mod, gmx_nb_generic_kernel);
             if (debug)
             {
                 fprintf(debug,
@@ -323,6 +334,7 @@ gmx_nonbonded_set_kernel_pointers(FILE *
             }
         }
     }
+    //printf("kernelptr_vf == %p\n", nl->kernelptr_vf);
     return;
 }
 
@@ -333,12 +345,15 @@ void do_nonbonded(t_forcerec *fr,
                   int nls, int eNL, int flags)
 {
     t_nblist *        nlist;
-    int               n, n0, n1, i, i0, i1, sz, range;
+    int               n, n0, n1, i, i0, i1, sz, range, k;
     t_nblists *       nblists;
     nb_kernel_data_t  kernel_data;
     nb_kernel_t *     kernelptr = NULL;
     rvec *            f;
 
+    printf("IN  DO_NONBONDED()\n");
+    printf("  with %d NB lists\n", fr->nnblists);
+
     kernel_data.flags                   = flags;
     kernel_data.exclusions              = excl;
     kernel_data.lambda                  = lambda;
@@ -350,6 +365,7 @@ void do_nonbonded(t_forcerec *fr,
         return;
     }
 
+    printf("eNL = %d\n", eNL);
     if (eNL >= 0)
     {
         i0 = eNL;
@@ -361,6 +377,7 @@ void do_nonbonded(t_forcerec *fr,
         i1 = eNL_NR;
     }
 
+    printf("nls = %d\n", nls);
     if (nls >= 0)
     {
         n0 = nls;
@@ -380,8 +397,10 @@ void do_nonbonded(t_forcerec *fr,
         kernel_data.table_vdw               = &nblists->table_vdw;
         kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
 
+        printf("NL number %d\n", n);
         for (range = 0; range < 2; range++)
         {
+            printf("NL range  %d\n", range);
             /* Are we doing short/long-range? */
             if (range == 0)
             {
@@ -412,8 +431,11 @@ void do_nonbonded(t_forcerec *fr,
 
             for (i = i0; (i < i1); i++)
             {
+                printf("interaction %d : nri = %d", i, nlist[i].nri);
                 if (nlist[i].nri > 0)
                 {
+                    printf(" j's =");
+		    for (k=0; k< ((nlist[i].nri < 10) ? nlist[i].nri : 10); k++) printf(" %d", nlist[i].jindex[k]);
                     if (flags & GMX_NONBONDED_DO_POTENTIAL)
                     {
                         /* Potential and force */
@@ -433,16 +455,30 @@ void do_nonbonded(t_forcerec *fr,
                     /* Neighborlists whose kernelptr==NULL will always be empty */
                     if (kernelptr != NULL)
                     {
+                        //printf("TEST FSHIFT 22 BEFOR KERN n=%d, i=%d: %10.1f %10.1f %10.1f", n, i, fr->fshift[22][XX], fr->fshift[22][YY], fr->fshift[22][ZZ]);
+                        printf("     KERNEL = %p, atoms = %d \n", kernelptr, nlist[i].nri);
                         (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
+                        //printf(" AFTER KERN: %10.1f %10.1f %10.1f\n", fr->fshift[22][XX], fr->fshift[22][YY], fr->fshift[22][ZZ]);
+			//fflush(stdout);
+                        /*{
+			    FILE *file;
+			    int tom;
+			    file = fopen("coordinates.xvg", "w");
+			    for (tom=0; tom<fr->natoms_force_constr; tom++) {fprintf(file, "%5d %12.6f %12.6f %12.6f\n", tom, x[tom][0], x[tom][1], x[tom][2]);}
+			    fclose(file);
+			    printf("coordinates written to file\n");
+			}*/
                     }
                     else
                     {
                         gmx_fatal(FARGS, "Non-empty neighborlist does not have any kernel pointer assigned.");
                     }
                 }
+		printf("\n");
             }
         }
     }
+    printf("END DO_NONBONDED()\n");
 }
 
 static void
diff -rupN gromacs-5.0/src/gromacs/gmxlib/txtdump.c gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/txtdump.c
--- gromacs-5.0/src/gromacs/gmxlib/txtdump.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxlib/txtdump.c	2014-09-02 22:23:43.000000000 +0200
@@ -999,6 +999,16 @@ void pr_inputrec(FILE *fp, int indent, c
         PI("QMconstraints", ir->QMconstraints);
         PI("QMMMscheme", ir->QMMMscheme);
         PR("MMChargeScaleFactor", ir->scalefactor);
+        PI("QMdftb-sccmode", ir->QMdftbsccmode);
+        PR("QMdftb-telec", ir->QMdftbtelec);
+        PS("QMdftb-slko-path", ir->QMdftbslkopath);
+        PS("QMdftb-slko-separator", ir->QMdftbslkoseparator);
+        PS("QMdftb-slko-suffix", ir->QMdftbslkosuffix);
+        PS("QMdftb-slko-lowercase", EBOOL(ir->QMdftbslkolowercase));
+        PI("QMdftb-partial-pme", ir->QMdftbpartialpme);
+        PI("QMdftb-dispersion", ir->QMdftbdispersion);
+        PI("QMdftb-CDKO", ir->QMdftbcdko);
+        PI("QMdftb-mmhubinf", ir->QMdftbmmhubinf);
         pr_qm_opts(fp, indent, "qm-opts", &(ir->opts));
 
         /* CONSTRAINT OPTIONS */
diff -rupN gromacs-5.0/src/gromacs/gmxpreprocess/readir.c gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxpreprocess/readir.c
--- gromacs-5.0/src/gromacs/gmxpreprocess/readir.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxpreprocess/readir.c	2014-09-02 22:24:10.000000000 +0200
@@ -94,6 +94,10 @@ typedef struct t_inputrec_strings
     char efield_x[STRLEN], efield_xt[STRLEN], efield_y[STRLEN],
          efield_yt[STRLEN], efield_z[STRLEN], efield_zt[STRLEN];
 
+    char            QMdftbslkopath[STRLEN];
+    char            QMdftbslkoseparator[STRLEN];
+    gmx_bool        QMdftbslkolowercase;
+    char            QMdftbslkosuffix[STRLEN];
 } gmx_inputrec_strings;
 
 static gmx_inputrec_strings *is = NULL;
@@ -2006,6 +2010,16 @@ void get_ir(const char *mdparin, const c
     STYPE ("SAsteps", is->SAsteps, NULL);
     CTYPE ("Scale factor for MM charges");
     RTYPE ("MMChargeScaleFactor", ir->scalefactor, 1.0);
+    ITYPE ("QMdftb-sccmode", ir->QMdftbsccmode, 3);
+    RTYPE ("QMdftb-telec", ir->QMdftbtelec, 1.);
+    STYPE ("QMdftb-slko-path", is->QMdftbslkopath, NULL);
+    STYPE ("QMdftb-slko-separator", is->QMdftbslkoseparator, NULL);
+    EETYPE("QMdftb-slko-lowercase", ir->QMdftbslkolowercase, yesno_names);
+    STYPE ("QMdftb-slko-suffix", is->QMdftbslkosuffix, NULL);
+    ITYPE ("QMdftb-partial-pme", ir->QMdftbpartialpme, 0);
+    ITYPE ("QMdftb-dispersion", ir->QMdftbdispersion, 0);
+    ITYPE ("QMdftb-cdko", ir->QMdftbcdko, 0);
+    ITYPE ("QMdftb-mmhub-inf", ir->QMdftbmmhubinf, 1);
     CTYPE ("Optimization of QM subsystem");
     STYPE ("bOPT",          is->bOPT, NULL);
     STYPE ("bTS",          is->bTS, NULL);
@@ -3660,6 +3674,12 @@ void do_index(const char* mdparin, const
         ir->opts.SAoff[i]   = strtod(ptr2[i], NULL);
         ir->opts.SAsteps[i] = strtol(ptr3[i], NULL, 10);
     }
+
+    /* QM/MM - DFTB text options */
+    strcpy(ir->QMdftbslkopath, is->QMdftbslkopath);
+    strcpy(ir->QMdftbslkoseparator, is->QMdftbslkoseparator);
+    strcpy(ir->QMdftbslkosuffix, is->QMdftbslkosuffix);
+
     /* end of QMMM input */
 
     if (bVerbose)
diff -rupN gromacs-5.0/src/gromacs/gmxpreprocess/topio.c gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxpreprocess/topio.c
--- gromacs-5.0/src/gromacs/gmxpreprocess/topio.c	2014-06-24 17:05:01.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/gmxpreprocess/topio.c	2014-08-18 11:24:45.000000000 +0200
@@ -1243,6 +1243,13 @@ static void generate_qmexcl_moltype(gmx_
                              (bQMMM[a1] && bQMMM[a3] && bQMMM[a4]) ||
                              (bQMMM[a2] && bQMMM[a3] && bQMMM[a4]));
                     break;
+                case 5:
+                    /* Tomas Kubar for QM/MM DFTB:
+                     * These are CHARMM CMAP dihedrals
+                     * Let us assume that they are all-MM,
+                     * and nothing has to be excluded.
+                     */
+                    break;
                 default:
                     gmx_fatal(FARGS, "no such bonded interactions with %d atoms\n", nratoms);
             }
diff -rupN gromacs-5.0/src/gromacs/legacyheaders/force.h gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/force.h
--- gromacs-5.0/src/gromacs/legacyheaders/force.h	2014-06-17 17:14:20.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/force.h	2014-09-08 15:33:14.000000000 +0200
@@ -157,6 +157,7 @@ void init_interaction_const_tables(FILE
                                    interaction_const_t *ic,
                                    gmx_bool             bSimpleTable,
                                    real                 rtab);
+                              //   matrix               box);
 /* Initializes the tables in the interaction constant data structure.
  * Setting verlet_kernel_type to -1 always initializes tables for
  * use with group kernels.
@@ -253,6 +254,16 @@ void ns(FILE              *fplog,
         gmx_bool           bDoLongRangeNS);
 /* Call the neighborsearcher */
 
+void ns_qmmm(
+        t_forcerec        *fr,
+        matrix             box,
+        gmx_groups_t      *groups,
+        gmx_localtop_t    *top,
+        t_mdatoms         *md,
+        t_commrec         *cr,
+        t_nrnb            *nrnb,
+        gmx_bool           bFillGrid);
+
 extern void do_force_lowlevel(FILE         *fplog,
                               gmx_int64_t   step,
                               t_forcerec   *fr,
diff -rupN gromacs-5.0/src/gromacs/legacyheaders/ns.h gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/ns.h
--- gromacs-5.0/src/gromacs/legacyheaders/ns.h	2014-06-17 17:14:20.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/ns.h	2014-08-18 11:28:14.000000000 +0200
@@ -89,6 +89,13 @@ int search_neighbours(FILE *log, t_force
                       gmx_bool bFillGrid,
                       gmx_bool bDoLongRangeNS);
 
+int search_neighbours_qmmm(t_forcerec *fr, matrix box,
+                      gmx_localtop_t *top,
+                      gmx_groups_t *groups,
+                      t_commrec *cr,
+                      t_nrnb *nrnb, t_mdatoms *md,
+                      gmx_bool bFillGrid);
+
 
 /* Debugging routines from wnblist.c */
 void dump_nblist(FILE *out, t_commrec *cr, t_forcerec *fr, int nDNL);
diff -rupN gromacs-5.0/src/gromacs/legacyheaders/pme.h gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/pme.h
--- gromacs-5.0/src/gromacs/legacyheaders/pme.h	2014-06-17 17:14:20.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/pme.h	2014-09-08 01:49:54.000000000 +0200
@@ -63,6 +63,14 @@ int gmx_pme_init(gmx_pme_t *pmedata, t_c
  * Return value 0 indicates all well, non zero is an error code.
  */
 
+int gmx_pme_init_dftb(gmx_pme_t *pmedata, t_commrec *cr,
+                 int nnodes_major, int nnodes_minor,
+                 int homenr,
+                 gmx_bool bReproducible, int nthread,
+                 gmx_pme_t pmedata_orig);
+/* QM/MM DFTB variant
+ */
+
 int gmx_pme_reinit(gmx_pme_t *         pmedata,
                    t_commrec *         cr,
                    gmx_pme_t           pme_src,
@@ -110,6 +118,32 @@ int gmx_pme_do(gmx_pme_t pme,
  * Return value 0 indicates all well, non zero is an error code.
  */
 
+int gmx_pme_do_dftb(gmx_pme_t pme,
+               int start,       int homenr,
+               rvec x[],        rvec f[],
+               real chargeA[],
+               matrix box,      t_commrec *cr,
+               int  maxshift_x, int maxshift_y,
+               t_nrnb *nrnb,    /* gmx_wallcycle_t wcycle, */
+               matrix vir_q,    real ewaldcoeff_q,
+               real *energy_q,
+               int flags,       double *pot);
+/* QM/MM DFTB variant
+ */
+
+int gmx_pme_do_dftb_mm_forces(gmx_pme_t pme,
+               int start,       int homenr,
+               rvec x[],        rvec f[],
+               real chargeA[],
+               matrix box,      t_commrec *cr,
+               int  maxshift_x, int maxshift_y,
+               t_nrnb *nrnb,    /* gmx_wallcycle_t wcycle, */
+               matrix vir_q,    real ewaldcoeff_q,
+               real *energy_q,
+               int flags);
+/* QM/MM DFTB variant - calculate MM forces only
+ */
+
 int gmx_pmeonly(gmx_pme_t pme,
                 t_commrec *cr,     t_nrnb *mynrnb,
                 gmx_wallcycle_t wcycle,
diff -rupN gromacs-5.0/src/gromacs/legacyheaders/qmmm.h gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/qmmm.h
--- gromacs-5.0/src/gromacs/legacyheaders/qmmm.h	2014-06-17 17:14:20.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/qmmm.h	2014-08-18 11:34:39.000000000 +0200
@@ -79,7 +79,8 @@ void update_QMMMrec(t_commrec      *cr,
 
 real calculate_QMMM(t_commrec *cr,
                     rvec x[], rvec f[],
-                    t_forcerec *fr);
+                    t_forcerec *fr,
+                    matrix box);
 
 /* QMMM computes the QM forces. This routine makes either function
  * calls to gmx QM routines (derived from MOPAC7 (semi-emp.) and MPQC
diff -rupN gromacs-5.0/src/gromacs/legacyheaders/types/inputrec.h gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/types/inputrec.h
--- gromacs-5.0/src/gromacs/legacyheaders/types/inputrec.h	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/types/inputrec.h	2014-09-02 22:21:51.000000000 +0200
@@ -467,6 +467,16 @@ typedef struct {
     int             QMconstraints; /* constraints on QM bonds                      */
     int             QMMMscheme;    /* Scheme: ONIOM or normal                      */
     real            scalefactor;   /* factor for scaling the MM charges in QM calc.*/
+    int             QMdftbsccmode; /* SCC-DFTB mode: 2 or 3 */
+    real            QMdftbtelec;   /* SCC-DFTB electronic temperature in K */
+    int             QMdftbpartialpme; /* PME performed with QM atoms only in the SCC-DFTB calculation after first SCC iteration */
+    int             QMdftbdispersion; /* SCC-DFTB empirical dispersion: 0==no, 1==D3, 2==Elstner2001 */
+    int             QMdftbcdko;    /* SCC-DFTB charge-dep. KO QM/MM: 1 or 0 */
+    int             QMdftbmmhubinf;/* SCC-DFTB CDKO - U_MM is infinity: 1 or 0 */
+    char            QMdftbslkopath[168];      /* SCC-DFTB path to the parameter files */
+    char            QMdftbslkoseparator[20];  /* ... separator between element symbols */
+    gmx_bool        QMdftbslkolowercase;      /* ... lowercase element symbols? ("h" instead of "H" for hydrogen */
+    char            QMdftbslkosuffix[20];     /* ... suffix of parameter file names */
                                    /* parameter needed for AdResS simulation       */
     gmx_bool        bAdress;       /* Is AdResS enabled ? */
     t_adress       *adress;        /* The data for adress simulations */
diff -rupN gromacs-5.0/src/gromacs/legacyheaders/types/qmmmrec.h gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/types/qmmmrec.h
--- gromacs-5.0/src/gromacs/legacyheaders/types/qmmmrec.h	2014-06-17 17:14:20.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/legacyheaders/types/qmmmrec.h	2014-10-15 00:05:06.000000000 +0200
@@ -38,11 +38,198 @@
 
 #include "simple.h"
 
+/* needed for DFTB
+ */
+#define DFTB_MAXTYPES (8)
+  /* max. number of chemical elements in DFTB */
+  /* POSSIBLY MAKE DYNAMIC! */
+#define LDIM (9)
+  /* size of prophylactic arrays - 1 s, 3 p and 5 d orbital components = 9 */
+#define MAXITER_BROYDEN (80)
+#define IMATSZ_BROYDEN (80)
+
+#define MAX_PME_NEIGHBORS (1666)
+
+#define DFTB_D3_MAXELEM (94)
+#define DFTB_D3_MAXC (5)
+
+typedef int twointegers[2];
+typedef double tendoubles[10];
+typedef double twodoubles[2];
+typedef double twodoubles_array[2][MAXITER_BROYDEN];
+typedef double sixdoubles[6];
+typedef int pme_integers[MAX_PME_NEIGHBORS];
+/* end DFTB
+ */
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 typedef struct {
+  /* MAXSIZ will be substituted by the (much smaller) actual number of atoms in each nucleobase */
+  double *f, //[MAXSIZ],
+         *ui, //[MAXSIZ],
+         *vti, //[MAXSIZ],
+         *t1, //[MAXSIZ],
+         *dumvi, //[MAXSIZ],
+         *df, //[MAXSIZ],
+         a[IMATSZ_BROYDEN][IMATSZ_BROYDEN],
+         b[IMATSZ_BROYDEN][IMATSZ_BROYDEN],
+         b_lapack[IMATSZ_BROYDEN * IMATSZ_BROYDEN],
+         cm[IMATSZ_BROYDEN],
+         w[IMATSZ_BROYDEN],
+         work[IMATSZ_BROYDEN * IMATSZ_BROYDEN];
+  long ipiv[IMATSZ_BROYDEN];
+  twodoubles *vector,
+             *unit31;
+  twodoubles_array *unit32;
+} dftb_broyden_t;
+
+typedef struct {
+  /* cut-off radii for all element pairs */
+  double r0ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM];
+  /* C6 for all element pairs */
+  double c6ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM][DFTB_D3_MAXC][DFTB_D3_MAXC][3];
+  /* how many different C6 for one element */
+  int mxc[DFTB_D3_MAXELEM];
+  /* covalent radii */
+  double rcov[DFTB_D3_MAXELEM];
+  /* atomic <r^2>/<r^4> values */
+  double r2r4[DFTB_D3_MAXELEM];
+
+  /* THE PARAMETERS OF THE METHOD (not all are "free") */
+  double rs6, rs8, s6, s18, rs18;
+
+  /* R^2 distance neglect threshold (important for speed in case of large systems) */
+  double rthr, rthr2;
+
+  /* coordination numbers of the atoms */
+  double *cn;
+  /* auxiliary arrays to be used in subroutines */
+  double *drij;
+  double *dcn;
+  double *dc6_rest;
+  double **dc6ij;
+  int *skip;
+} dftb_dftd3_t;
+
+typedef struct {
+  dvec *x, // NNDIM 3 /* coordinates */
+       *grad, /* gradients */
+       *partgrad, /* temporary array for gradient components */
+       *xe, /* coordinates of external charges, NNDIM 3 */
+       *mmgrad, /* gradients at external charges */
+       *partmmgrad, /* temp. array for components of gradients at external charges */
+       com, /* center of mass */
+       **gamma_deriv; /* derivatives of the gammamat */
+  double *mass, /* masses of the atoms */
+         inv_tot_mass, /* 1 / sum(mass) */
+         *ze, /* magnitudes of external charges, NNDIM */
+         *qmat, // NNDIM
+         *qmold, // NNDIM
+         *qmulli, // MDIM !!!
+         au[LDIM][LDIM],
+         bu[LDIM][LDIM],
+         auh[LDIM][LDIM],
+         buh[LDIM][LDIM],
+         **a, // MDIM MDIM
+         **b, // MDIM MDIM
+         *a_trans, //ndim^2
+         *b_trans, //ndim^2
+         **hamil, // MDIM MDIM
+         **overl, // MDIM MDIM
+         **gammamat, // nn nn
+         **gammader, // nn nn
+         *shift, // NNDIM
+         *shift3, // NNDIM
+         *shift3a, // NNDIM
+         *shiftE, // NNDIM
+         *shiftE2, // NNDIM
+         *ev, // MDIM
+         *occ, // MDIM
+         *aux, // 3*MDIM or 1 + 6*NNDIM + 2*NNDIM^2
+         telec, /* electronic temperature for fermi() */
+	 *pot2, *pot3, *pot4, *pot5, *pot6, *pot7, // components of PME electrostatic potential
+	 *pot; // electrostatic potential from PME ("external shift" in DFTB */
+  int nn, /* number of atoms */
+      *ind, // nn+1
+      ndim, // = ind[nn]
+      ne, /* number of external charges */
+      *izp, /* list of atom types */
+      *izpxh, /* does the atom need a special treatment of gamma? (1 for hydrogen, 0 for other elements */
+      nel, /* number of electrons */
+      norb, /* number of orbitals == ndim!*/
+      *neighbors_pme; /* for PME - number of neighbors for each atom */
+  pme_integers *neighbor_pme; /* for PME - neighbor lists for each atom */
+  long *iaux; // 3 + 5*NNDIM
+  real *q_pme;
+  rvec *x_pme, *f_pme;
+  t_nrnb *nrnb_pme;
+} dftb_phase1_t;
+
+typedef struct {
+  /* copied from charge_transfer_t */
+  int atoms;           /* number of QM atoms */
+  int *atom;           /* list of QM atoms */
+  int *atomtype;       /* similarly as in the previous case */
+                       /* C=0, H=1, N=2, O=3 -- or any other combination; usually, each type is an element */
+  int elements;        /* number of chemical elements in the QM system */
+  int element[DFTB_MAXTYPES]; /* definition of used chemical elements */
+                       /* element[0]=6 means that the first element is carbon (atomic number 6), etc. */
+  int extcharges;      /* number of extcharges - MM atoms, corrected for link atoms */
+  int *extcharge;      /* list of extcharges */
+  int *modif_extcharge;/* which extcharges correspond to those of O4a and C2q? will be modified, +0.06080 - UNNECESSARY? */
+  int sccmode;         /* 2 for 1998 SCC-DFTB, 3 for 2011 DFTB3 */
+
+  char            slkopath[168];      /* path to the parameter files */
+  char            slkoseparator[20];  /* ... separator between element symbols */
+  gmx_bool        slkolowercase;      /* ... lowercase element symbols? ("h" instead of "H" for hydrogen */
+  char            slkosuffix[20];     /* ... suffix of parameter file names */
+
+  /* new thingies */
+  int cdko;            /* whether or not to do charge-dependent Klopman--Ohno QM/MM (1 or 0) */
+  int mmhub_inf;       /* whether or not Hubbard of MM atoms shall be infinite (1 or 0) */
+  int *mm_element;     /* what chemical elements are MM atoms? one entry per atom,
+                          to be used the same way as int *atomtype for QM atoms above */
+  int partial_pme;     /* whether or not the PME calculation of external potential
+                          shall be done for images of QM atoms only, starting from 2nd SCC iteration */
+  int cutoff_qmmm;     /* whether a switched cut-off QM/MM calculation shall be done instead of PME */
+  int surf_corr_pme;   /* whether the surface correction shall be considered or not (if not = tin-foil boundary cond. */
+  int dispersion;      /* whether or not (or perhaps which kind of)
+                          empirical dispersion shall be calculated (0==NO, 1==DFT-D3, 2==Elstner2001) */
+  dftb_dftd3_t *dftd3; /* structure with data necessary for Grimme's D3 dispersion */
+
+  /* output */
+  int output_qm_freq;  /* how often (if ever) the QM coordinates shall be written */
+  int output_mm_freq;  /* how often (if ever) the MM coordinates shall be written */
+
+  /* original content of dftb_t */
+  int lmax[DFTB_MAXTYPES];       /* number of shells for each atom type */
+  double racc, dacc;             /* machine accuracy */
+  tendoubles *skhtab1[DFTB_MAXTYPES][DFTB_MAXTYPES], *skstab1[DFTB_MAXTYPES][DFTB_MAXTYPES];
+  double skself1[DFTB_MAXTYPES][3], dr1[DFTB_MAXTYPES][DFTB_MAXTYPES],
+    qzero1[DFTB_MAXTYPES], uhubb1[DFTB_MAXTYPES], uhder1[DFTB_MAXTYPES], zeta1; /* SLKO parameters for DFTB phase 1 */
+  int dim1[DFTB_MAXTYPES][DFTB_MAXTYPES];
+  dftb_phase1_t phase1;
+  dftb_broyden_t *broyden;
+  /* repulsive */
+  dvec efkt[DFTB_MAXTYPES][DFTB_MAXTYPES];
+  int numint[DFTB_MAXTYPES][DFTB_MAXTYPES];
+  double cutoff[DFTB_MAXTYPES][DFTB_MAXTYPES];
+  sixdoubles *coeff[DFTB_MAXTYPES][DFTB_MAXTYPES];
+  twodoubles *xr[DFTB_MAXTYPES][DFTB_MAXTYPES];
+  /* charge-dependent Klopman--Ohno */
+  double alpha1[DFTB_MAXTYPES], beta1[DFTB_MAXTYPES];
+  /* for PME */
+  double rcoulomb_pme; // cut-off
+  double rlist_pme; // neighborlist cut-off (for PME, equal to rcoulomb_pme; for switched cut-off, larger)
+  int nstlist_pme, lastlist_pme; // frequency of neighborsearching; last step when neighborsearching was done
+
+  /* COPY PHASE1 BELOW! */
+} dftb_t;
+
+typedef struct {
     int                nrQMatoms;      /* total nr of QM atoms              */
     rvec              *xQM;            /* shifted to center of box          */
     int               *indexQM;        /* atom i = atom indexQM[i] in mdrun */
@@ -83,14 +270,31 @@ typedef struct {
     ivec               SHbasis;
     int                CASelectrons;
     int                CASorbitals;
+    /* DFTB */
+    char               dftbslkopath[168];      /* SCC-DFTB path to the parameter files */
+    char               dftbslkoseparator[20];  /* ... separator between element symbols */
+    gmx_bool           dftbslkolowercase;      /* ... lowercase element symbols? ("h" instead of "H" for hydrogen */
+    char               dftbslkosuffix[20];     /* ... suffix of parameter file names */
+    int                dftbsccmode;
+    double             dftbtelec;
+    int                dftbpartialpme;
+    int                dftbdispersion;
+    int                dftbcdko;
+    int                dftbmmhubinf;
+    dftb_t            *dftb;
 } t_QMrec;
 
 typedef struct {
     int            nrMMatoms;   /* nr of MM atoms, updated every step*/
+//  int            nrMMatoms_new;
     rvec          *xMM;         /* shifted to center of box          */
+//  rvec          *xMM_new;
     int           *indexMM;     /* atom i = atom indexMM[I] in mdrun */
+//  int           *indexMM_new;
     real          *MMcharges;   /* MM point charges in std QMMM calc.*/
+//  real          *MMcharges_new;
     int           *shiftMM;
+//  int           *shiftMM_new;
     int           *MMatomtype;  /* only important for semi-emp.      */
     real           scalefactor;
     /* gaussian specific stuff */
diff -rupN gromacs-5.0/src/gromacs/mdlib/force.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/force.c
--- gromacs-5.0/src/gromacs/mdlib/force.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/force.c	2014-10-23 17:19:25.000000000 +0200
@@ -67,6 +67,13 @@
 #include "gromacs/timing/wallcycle.h"
 #include "gmx_fatal.h"
 
+/* PLUMED */
+#include "../../../Plumed.h"
+int    plumedswitch=0;
+plumed plumedmain;
+void(*plumedcmd)(plumed,const char*,const void*)=NULL;
+/* END PLUMED */
+
 void ns(FILE              *fp,
         t_forcerec        *fr,
         matrix             box,
@@ -110,6 +117,50 @@ void ns(FILE              *fp,
     }
 }
 
+void ns_qmmm(
+        t_forcerec        *fr,
+        matrix             box,
+        gmx_groups_t      *groups,
+        gmx_localtop_t    *top,
+        t_mdatoms         *md,
+        t_commrec         *cr,
+        t_nrnb            *nrnb,
+        gmx_bool           bFillGrid)
+{
+    char   *ptr;
+    int     nsearch;
+
+
+    if (!fr->ns.nblist_initialized)
+    {
+        init_neighbor_list(NULL, fr, md->homenr);
+    }
+
+    if (fr->bTwinRange)
+    {
+        fr->nlr = 0;
+    }
+
+    nsearch = search_neighbours_qmmm(fr, box, top, groups, cr, nrnb, md,
+                                bFillGrid);
+    /*
+    if (debug)
+    {
+        fprintf(debug, "nsearch = %d\n", nsearch);
+    }
+    */
+
+    /* Check whether we have to do dynamic load balancing */
+    /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
+       count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
+       &(top->idef),opts->ngener);
+     */
+    if (fr->ns.dump_nl > 0)
+    {
+        dump_nblist(NULL, cr, fr, fr->ns.dump_nl);
+    }
+}
+
 static void reduce_thread_forces(int n, rvec *f,
                                  tensor vir_q, tensor vir_lj,
                                  real *Vcorr_q, real *Vcorr_lj,
@@ -203,9 +254,13 @@ void do_force_lowlevel(FILE       *fplog
     debug_gmx();
 
     /* do QMMM first if requested */
+#pragma omp single
+    {
     if (fr->bQMMM)
     {
-        enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr);
+        fprintf(stdout, "\nSTEP %d\n\n", step); /* Tomas Kubar */
+        enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, box);
+    }
     }
 
     if (bSepDVDL)
@@ -281,9 +336,11 @@ void do_force_lowlevel(FILE       *fplog
         }
 
         wallcycle_sub_start(wcycle, ewcsNONBONDED);
+        /* printf("TEST FSHIFT 22 BEFOR DO_NONB: %f %f %f\n", fr->fshift[22][XX], fr->fshift[22][YY], fr->fshift[22][ZZ]); */
         do_nonbonded(fr, x, f, f_longrange, md, excl,
                      &enerd->grpp, nrnb,
                      lambda, dvdl_nb, -1, -1, donb_flags);
+        /* printf("TEST FSHIFT 22 AFTER DO_NONB: %f %f %f\n", fr->fshift[22][XX], fr->fshift[22][YY], fr->fshift[22][ZZ]); */
 
         /* If we do foreign lambda and we have soft-core interactions
          * we have to recalculate the (non-linear) energies contributions.
@@ -738,6 +795,14 @@ void do_force_lowlevel(FILE       *fplog
         pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
     }
 
+    /* PLUMED */
+    if(plumedswitch){
+      int plumedNeedsEnergy;
+      (*plumedcmd)(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
+      if(!plumedNeedsEnergy) (*plumedcmd)(plumedmain,"performCalc",NULL);
+    }
+    /* END PLUMED */
+
 }
 
 void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd)
diff -rupN gromacs-5.0/src/gromacs/mdlib/forcerec.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/forcerec.c
--- gromacs-5.0/src/gromacs/mdlib/forcerec.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/forcerec.c	2014-09-08 16:07:59.000000000 +0200
@@ -81,6 +81,8 @@
 #include "nbnxn_cuda_data_mgmt.h"
 #include "pmalloc_cuda.h"
 
+#include <stdio.h>
+
 t_forcerec *mk_forcerec(void)
 {
     t_forcerec *fr;
@@ -1865,6 +1867,7 @@ gmx_bool uses_simple_tables(int
 static void init_ewald_f_table(interaction_const_t *ic,
                                gmx_bool             bUsesSimpleTables,
                                real                 rtab)
+                           //  matrix               box)
 {
     real maxr;
 
@@ -1875,8 +1878,15 @@ static void init_ewald_f_table(interacti
          */
         ic->tabq_scale = ewald_spline3_table_scale(ic);
 
-        maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
+        maxr           =  (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
+                          /* previously,
+                           * Tomas Kubar had substituted here:
+                         sqrt(box[XX][XX]*box[XX][XX] + box[YY][YY]*box[YY][YY] + box[ZZ][ZZ]*box[ZZ][ZZ]);
+                           * or similar
+                           */
         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
+        printf("INIT_EWALD_F_TABLE: scale=%f, maxr=%f, size=%d\n",
+          ic->tabq_scale, maxr, ic->tabq_size);
     }
     else
     {
@@ -1917,12 +1927,13 @@ void init_interaction_const_tables(FILE
                                    interaction_const_t *ic,
                                    gmx_bool             bUsesSimpleTables,
                                    real                 rtab)
+                              //   matrix               box)
 {
     real spacing;
 
     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
     {
-        init_ewald_f_table(ic, bUsesSimpleTables, rtab);
+        init_ewald_f_table(ic, bUsesSimpleTables, rtab); //, box); /* Tomas Kubar needed that previously */
 
         if (fp != NULL)
         {
@@ -1978,6 +1989,7 @@ init_interaction_const(FILE
                        interaction_const_t       **interaction_const,
                        const t_forcerec           *fr,
                        real                        rtab)
+                  //   matrix                      box)
 {
     interaction_const_t *ic;
     gmx_bool             bUsesSimpleTables = TRUE;
@@ -2128,7 +2140,7 @@ init_interaction_const(FILE
     }
 
     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
-    init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
+    init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab); // , box);
 }
 
 static void init_nb_verlet(FILE                *fp,
@@ -3249,7 +3261,7 @@ void init_forcerec(FILE              *fp
     }
 
     /* fr->ic is used both by verlet and group kernels (to some extent) now */
-    init_interaction_const(fp, cr, &fr->ic, fr, rtab);
+    init_interaction_const(fp, cr, &fr->ic, fr, rtab); //, box);
 
     if (ir->eDispCorr != edispcNO)
     {
diff -rupN gromacs-5.0/src/gromacs/mdlib/minimize.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/minimize.c
--- gromacs-5.0/src/gromacs/mdlib/minimize.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/minimize.c	2014-10-23 17:19:03.000000000 +0200
@@ -80,6 +80,13 @@
 #include "gromacs/timing/walltime_accounting.h"
 #include "gromacs/imd/imd.h"
 
+/* PLUMED */
+#include "../../../Plumed.h"
+extern int    plumedswitch;
+extern plumed plumedmain;
+extern void(*plumedcmd)(plumed,const char*,const void*);
+/* END PLUMED */
+
 typedef struct {
     t_state  s;
     rvec    *f;
@@ -441,6 +448,43 @@ void init_em(FILE *fplog, const char *ti
 
     clear_rvec(mu_tot);
     calc_shifts(ems->s.box, fr->shift_vec);
+
+    /* PLUMED */
+    if(plumedswitch){
+      if(cr->ms && cr->ms->nsim>1) {
+        if(MASTER(cr)) (*plumedcmd) (plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters);
+        if(PAR(cr)){
+          if(DOMAINDECOMP(cr)) {
+            (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all);
+          }else{
+            (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim);
+          }
+        }
+        (*plumedcmd) (plumedmain,"GREX init",NULL);
+      }
+      if(PAR(cr)){
+        if(DOMAINDECOMP(cr)) {
+          (*plumedcmd) (plumedmain,"setMPIComm",&cr->dd->mpi_comm_all);
+        }else{
+          (*plumedcmd) (plumedmain,"setMPIComm",&cr->mpi_comm_mysim);
+        }
+      }
+      (*plumedcmd) (plumedmain,"setNatoms",&top_global->natoms);
+      (*plumedcmd) (plumedmain,"setMDEngine","gromacs");
+      (*plumedcmd) (plumedmain,"setLog",fplog);
+      real real_delta_t;
+      real_delta_t=ir->delta_t;
+      (*plumedcmd) (plumedmain,"setTimestep",&real_delta_t);
+      (*plumedcmd) (plumedmain,"init",NULL);
+
+      if(PAR(cr)){
+        if(DOMAINDECOMP(cr)) {
+          (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home);
+          (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex);
+        }
+      }
+    }
+    /* END PLUMED */
 }
 
 static void finish_em(t_commrec *cr, gmx_mdoutf_t outf,
@@ -734,12 +778,34 @@ static void evaluate_energy(FILE *fplog,
         em_dd_partition_system(fplog, count, cr, top_global, inputrec,
                                ems, top, mdatoms, fr, vsite, constr,
                                nrnb, wcycle);
+        /* PLUMED */
+        if(plumedswitch){
+          (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home);
+          (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex);
+        }
+        /* END PLUMED */
     }
 
     /* Calc force & energy on new trial position  */
     /* do_force always puts the charge groups in the box and shifts again
      * We do not unshift, so molecules are always whole in congrad.c
      */
+
+    /* PLUMED */
+    int plumedNeedsEnergy=0;
+    if(plumedswitch){
+      long int lstep=count; (*plumedcmd)(plumedmain,"setStepLong",&count);
+      (*plumedcmd) (plumedmain,"setPositions",&ems->s.x[0][0]);
+      (*plumedcmd) (plumedmain,"setMasses",&mdatoms->massT[0]);
+      (*plumedcmd) (plumedmain,"setCharges",&mdatoms->chargeA[0]);
+      (*plumedcmd) (plumedmain,"setBox",&ems->s.box[0][0]);
+      (*plumedcmd) (plumedmain,"prepareCalc",NULL);
+      (*plumedcmd) (plumedmain,"setForces",&ems->f[0][0]);
+      (*plumedcmd) (plumedmain,"setVirial",&force_vir[0][0]);
+      (*plumedcmd) (plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
+    }
+    /* END PLUMED */
+
     do_force(fplog, cr, inputrec,
              count, nrnb, wcycle, top, &top_global->groups,
              ems->s.box, ems->s.x, &ems->s.hist,
@@ -749,6 +815,15 @@ static void evaluate_energy(FILE *fplog,
              GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
              (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
 
+    /* PLUMED */
+    if(plumedswitch){
+      if(plumedNeedsEnergy) {
+        (*plumedcmd) (plumedmain,"setEnergy",&enerd->term[F_EPOT]);
+        (*plumedcmd) (plumedmain,"performCalc",NULL);
+      }
+    }
+    /* END PLUMED */
+
     /* Clear the unused shake virial and pressure */
     clear_mat(shake_vir);
     clear_mat(pres);
@@ -991,6 +1066,14 @@ double do_cg(FILE *fplog, t_commrec *cr,
 
     step = 0;
 
+    /* QM/MM - DFTB */
+    FILE             *f_qm_dftb_charges=NULL;
+    int               counter_tom;
+    if (fr->bQMMM)
+    {
+        f_qm_dftb_charges = fopen("qm_dftb_charges.xvg", "w");
+    }
+
     s_min = init_em_state();
     s_a   = init_em_state();
     s_b   = init_em_state();
@@ -1028,6 +1111,16 @@ double do_cg(FILE *fplog, t_commrec *cr,
                     mu_tot, enerd, vir, pres, -1, TRUE);
     where();
 
+    /* QM/MM - DFTB */
+    if (fr->bQMMM)
+    {
+        fprintf(f_qm_dftb_charges, "%10d", step);
+        for (counter_tom=0; counter_tom < fr->qr->qm[0]->dftb->phase1.nn; counter_tom++)
+          fprintf(f_qm_dftb_charges, "%8.4f", -fr->qr->qm[0]->dftb->phase1.qmat[counter_tom]
+                                              + fr->qr->qm[0]->dftb->qzero1[fr->qr->qm[0]->dftb->phase1.izp[counter_tom]]);
+        fprintf(f_qm_dftb_charges, "\n");
+    }
+
     if (MASTER(cr))
     {
         /* Copy stuff to the energy bin for easy printing etc. */
@@ -1204,6 +1297,16 @@ double do_cg(FILE *fplog, t_commrec *cr,
                         vsite, constr, fcd, graph, mdatoms, fr,
                         mu_tot, enerd, vir, pres, -1, FALSE);
 
+        /* QM/MM - DFTB */
+        if (fr->bQMMM)
+        {
+            fprintf(f_qm_dftb_charges, "%10d", step);
+            for (counter_tom=0; counter_tom < fr->qr->qm[0]->dftb->phase1.nn; counter_tom++)
+              fprintf(f_qm_dftb_charges, "%8.4f", -fr->qr->qm[0]->dftb->phase1.qmat[counter_tom]
+                                                  + fr->qr->qm[0]->dftb->qzero1[fr->qr->qm[0]->dftb->phase1.izp[counter_tom]]);
+            fprintf(f_qm_dftb_charges, "\n");
+        }
+
         /* Calc derivative along line */
         p   = s_c->s.cg_p;
         sf  = s_c->f;
@@ -1312,6 +1415,16 @@ double do_cg(FILE *fplog, t_commrec *cr,
                                 vsite, constr, fcd, graph, mdatoms, fr,
                                 mu_tot, enerd, vir, pres, -1, FALSE);
 
+                /* QM/MM - DFTB */
+                if (fr->bQMMM)
+                {
+                    fprintf(f_qm_dftb_charges, "%10d", step);
+                    for (counter_tom=0; counter_tom < fr->qr->qm[0]->dftb->phase1.nn; counter_tom++)
+                      fprintf(f_qm_dftb_charges, "%8.4f", -fr->qr->qm[0]->dftb->phase1.qmat[counter_tom]
+                                                          + fr->qr->qm[0]->dftb->qzero1[fr->qr->qm[0]->dftb->phase1.izp[counter_tom]]);
+                    fprintf(f_qm_dftb_charges, "\n");
+                }
+
                 /* p does not change within a step, but since the domain decomposition
                  * might change, we have to use cg_p of s_b here.
                  */
@@ -1567,6 +1680,12 @@ double do_cg(FILE *fplog, t_commrec *cr,
     /* To print the actual number of steps we needed somewhere */
     walltime_accounting_set_nsteps_done(walltime_accounting, step);
 
+    /* QM/MM - DFTB */
+    if (f_qm_dftb_charges)
+    {
+        fclose(f_qm_dftb_charges);
+    }
+
     return 0;
 } /* That's all folks */
 
diff -rupN gromacs-5.0/src/gromacs/mdlib/ns.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/ns.c
--- gromacs-5.0/src/gromacs/mdlib/ns.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/ns.c	2014-09-08 00:39:44.000000000 +0200
@@ -310,13 +310,14 @@ void init_neighbor_list(FILE *log, t_for
                         maxsr, maxlr, GMX_NBKERNEL_VDW_NONE, eintmodNONE, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_FREE_ENERGY, bElecAndVdwSwitchDiffers);
         }
     }
+#ifndef GMX_QMMM_DFTB
     /* QMMM MM list */
     if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
     {
         init_nblist(log, &fr->QMMMlist, NULL,
                     maxsr, maxlr, 0, 0, ielec, ielecmod, GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE, GMX_NBLIST_INTERACTION_STANDARD, bElecAndVdwSwitchDiffers);
     }
-
+#endif
     if (log != NULL)
     {
         fprintf(log, "\n");
@@ -338,13 +339,13 @@ static void reset_nblist(t_nblist *nl)
 static void reset_neighbor_lists(t_forcerec *fr, gmx_bool bResetSR, gmx_bool bResetLR)
 {
     int n, i;
-
+#ifndef GMX_QMMM_DFTB
     if (fr->bQMMM)
     {
         /* only reset the short-range nblist */
         reset_nblist(&(fr->QMMMlist));
     }
-
+#endif
     for (n = 0; n < fr->nnblists; n++)
     {
         for (i = 0; i < eNL_NR; i++)
@@ -2839,6 +2840,8 @@ int search_neighbours(FILE *log, t_force
          * fill a special QMMM neighbourlist that contains all neighbours
          * of the QM atoms. If bQMMM is true, this list will now be made:
          */
+#ifndef GMX_QMMM_DFTB
+        /* we do not need it for DFTB */
         if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
         {
             nsearch += nsgrid_core(cr, fr, box, ngid, top,
@@ -2846,6 +2849,7 @@ int search_neighbours(FILE *log, t_force
                                    md, put_in_list_qmmm, ns->bHaveVdW,
                                    bDoLongRangeNS, TRUE);
         }
+#endif
     }
     else
     {
@@ -2861,6 +2865,138 @@ int search_neighbours(FILE *log, t_force
 
     inc_nrnb(nrnb, eNR_NS, nsearch);
     /* inc_nrnb(nrnb,eNR_LR,fr->nlr); */
+
+    return nsearch;
+}
+
+int search_neighbours_qmmm(t_forcerec *fr,
+                           matrix box,
+                           gmx_localtop_t *top,
+                           gmx_groups_t *groups,
+                           t_commrec *cr,
+                           t_nrnb *nrnb, t_mdatoms *md,
+                           gmx_bool bFillGrid)
+{
+    t_block  *cgs = &(top->cgs);
+    rvec     box_size, grid_x0, grid_x1;
+    int      i, j, m, ngid;
+    real     min_size, grid_dens;
+    int      nsearch;
+    char     *ptr;
+    gmx_bool     *i_egp_flags;
+    int      cg_start, cg_end, start, end;
+    gmx_ns_t *ns;
+    t_grid   *grid;
+    gmx_domdec_zones_t *dd_zones;
+
+    ns = &fr->ns;
+
+    /* Set some local variables */
+    ngid  = groups->grps[egcENER].nr;
+
+    for (m = 0; (m < DIM); m++)
+    {
+        box_size[m] = box[m][m];
+    }
+
+    if (fr->ePBC != epbcNONE)
+    {
+        if (sqr(fr->rlistlong) >= max_cutoff2(fr->ePBC, box))
+        {
+            gmx_fatal(FARGS, "One of the box vectors has become shorter than twice the cut-off length or box_yy-|box_zy| or box_zz has become smaller than the cut-off.");
+        }
+    }
+
+    if (DOMAINDECOMP(cr))
+    {
+        ns_realloc_natoms(ns, cgs->index[cgs->nr]);
+    }
+    debug_gmx();
+
+    /* Reset the neighbourlists */
+    reset_neighbor_lists(fr, TRUE, TRUE);
+
+    if (bFillGrid)
+    {
+
+        grid = ns->grid;
+        if (DOMAINDECOMP(cr))
+        {
+            dd_zones = domdec_zones(cr->dd);
+        }
+        else
+        {
+            dd_zones = NULL;
+
+            get_nsgrid_boundaries(grid->nboundeddim, box, NULL, NULL, NULL, NULL,
+                                  cgs->nr, fr->cg_cm, grid_x0, grid_x1, &grid_dens);
+
+            grid_first(NULL, grid, NULL, NULL, box, grid_x0, grid_x1,
+                       fr->rlistlong, grid_dens);
+        }
+        debug_gmx();
+
+        start = 0;
+        end   = cgs->nr;
+
+        if (DOMAINDECOMP(cr))
+        {
+            end = cgs->nr;
+            fill_grid(dd_zones, grid, end, -1, end, fr->cg_cm);
+            grid->icg0 = 0;
+            grid->icg1 = dd_zones->izone[dd_zones->nizone-1].cg1;
+        }
+        else
+        {
+            fill_grid(NULL, grid, cgs->nr, fr->cg0, fr->hcg, fr->cg_cm);
+            grid->icg0 = fr->cg0;
+            grid->icg1 = fr->hcg;
+            debug_gmx();
+        }
+
+        calc_elemnr(grid, start, end, cgs->nr);
+        calc_ptrs(grid);
+        grid_last(grid, start, end, cgs->nr);
+
+        if (gmx_debug_at)
+        {
+            check_grid(grid);
+            print_grid(debug, grid);
+        }
+    }
+    else if (fr->n_tpi)
+    {
+        /* Set the grid cell index for the test particle only.
+         * The cell to cg index is not corrected, but that does not matter.
+         */
+        fill_grid(NULL, ns->grid, fr->hcg, fr->hcg-1, fr->hcg, fr->cg_cm);
+    }
+    debug_gmx();
+
+    /* Do the core! */
+    grid    = ns->grid;
+    nsearch = 0; /* nsgrid_core(cr, fr, box, ngid, top,
+                          grid, ns->bexcl, ns->bExcludeAlleg,
+                          md, put_in_list, ns->bHaveVdW,
+                          FALSE, FALSE); */
+
+    /* neighbour searching withouth QMMM! QM atoms have zero charge in
+     * the classical calculation. The charge-charge interaction
+     * between QM and MM atoms is handled in the QMMM core calculation
+     * (see QMMM.c). The VDW however, we'd like to compute classically
+     * and the QM MM atom pairs have just been put in the
+     * corresponding neighbourlists. in case of QMMM we still need to
+     * fill a special QMMM neighbourlist that contains all neighbours
+     * of the QM atoms. If bQMMM is true, this list will now be made:
+     */
+
+    nsearch += nsgrid_core(cr, fr, box, ngid, top,
+                           grid, ns->bexcl, ns->bExcludeAlleg,
+                           md, put_in_list_qmmm, ns->bHaveVdW,
+                           FALSE, TRUE);
+
+    inc_nrnb(nrnb, eNR_NS, nsearch);
+    /* inc_nrnb(nrnb,eNR_LR,fr->nlr); */
 
     return nsearch;
 }
diff -rupN gromacs-5.0/src/gromacs/mdlib/pme.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/pme.c
--- gromacs-5.0/src/gromacs/mdlib/pme.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/pme.c	2014-09-08 01:49:33.000000000 +0200
@@ -2631,6 +2631,131 @@ static void gather_f_bsplines(gmx_pme_t
      */
 }
 
+static void gather_fdivq_bsplines_dftb(gmx_pme_t pme, real *grid,
+                              pme_atomcomm_t *atc,
+                              splinedata_t *spline)
+{
+    /* sum forces per unit of charge (no multiplication by charge!) for local particles */
+    int     nn, n, ithx, ithy, ithz, i0, j0, k0;
+    int     index_x, index_xy;
+    int     nx, ny, nz, pnx, pny, pnz;
+    int *   idxptr;
+    real    tx, ty, dx, dy;
+    real    fx, fy, fz, gval;
+    real    fxy1, fz1;
+    real    *thx, *thy, *thz, *dthx, *dthy, *dthz;
+    int     norder;
+    real    rxx, ryx, ryy, rzx, rzy, rzz;
+    int     order;
+
+    pme_spline_work_t *work;
+
+#if defined PME_SIMD4_SPREAD_GATHER && !defined PME_SIMD4_UNALIGNED
+    real           thz_buffer[GMX_SIMD4_WIDTH*3],  *thz_aligned;
+    real           dthz_buffer[GMX_SIMD4_WIDTH*3], *dthz_aligned;
+
+    thz_aligned  = gmx_simd4_align_r(thz_buffer);
+    dthz_aligned = gmx_simd4_align_r(dthz_buffer);
+#endif
+
+    work = pme->spline_work;
+
+    order = pme->pme_order;
+    thx   = spline->theta[XX];
+    thy   = spline->theta[YY];
+    thz   = spline->theta[ZZ];
+    dthx  = spline->dtheta[XX];
+    dthy  = spline->dtheta[YY];
+    dthz  = spline->dtheta[ZZ];
+    nx    = pme->nkx;
+    ny    = pme->nky;
+    nz    = pme->nkz;
+    pnx   = pme->pmegrid_nx;
+    pny   = pme->pmegrid_ny;
+    pnz   = pme->pmegrid_nz;
+
+    rxx   = pme->recipbox[XX][XX];
+    ryx   = pme->recipbox[YY][XX];
+    ryy   = pme->recipbox[YY][YY];
+    rzx   = pme->recipbox[ZZ][XX];
+    rzy   = pme->recipbox[ZZ][YY];
+    rzz   = pme->recipbox[ZZ][ZZ];
+
+    for (nn = 0; nn < spline->n; nn++)
+    {
+        n           = spline->ind[nn];
+
+        atc->f[n][XX] = 0;
+        atc->f[n][YY] = 0;
+        atc->f[n][ZZ] = 0;
+
+        fx     = 0;
+        fy     = 0;
+        fz     = 0;
+        idxptr = atc->idx[n];
+        norder = nn*order;
+
+        i0   = idxptr[XX];
+        j0   = idxptr[YY];
+        k0   = idxptr[ZZ];
+
+        /* Pointer arithmetic alert, next six statements */
+        thx  = spline->theta[XX] + norder;
+        thy  = spline->theta[YY] + norder;
+        thz  = spline->theta[ZZ] + norder;
+        dthx = spline->dtheta[XX] + norder;
+        dthy = spline->dtheta[YY] + norder;
+        dthz = spline->dtheta[ZZ] + norder;
+
+        switch (order)
+        {
+            case 4:
+#ifdef PME_SIMD4_SPREAD_GATHER
+#ifdef PME_SIMD4_UNALIGNED
+#define PME_GATHER_F_SIMD4_ORDER4
+#else
+#define PME_GATHER_F_SIMD4_ALIGNED
+#define PME_ORDER 4
+#endif
+#include "pme_simd4.h"
+#else
+                DO_FSPLINE(4);
+#endif
+                break;
+            case 5:
+#ifdef PME_SIMD4_SPREAD_GATHER
+#define PME_GATHER_F_SIMD4_ALIGNED
+#define PME_ORDER 5
+#include "pme_simd4.h"
+#else
+                DO_FSPLINE(5);
+#endif
+                break;
+            default:
+                DO_FSPLINE(order);
+                break;
+        }
+
+        /* These contributions have to be taken as negative,
+         * see the lines
+         * atc->f[n][XX] += -coefficient*(fx*nx*rxx) ...
+         * in gather_f_bsplines()
+         */
+        atc->f[n][XX] -= fx*nx*rxx;
+        atc->f[n][YY] -= fx*nx*ryx + fy*ny*ryy;
+        atc->f[n][ZZ] -= fx*nx*rzx + fy*ny*rzy + fz*nz*rzz;
+    }
+    /* Since the energy and not forces are interpolated
+     * the net force might not be exactly zero.
+     * This can be solved by also interpolating F, but
+     * that comes at a cost.
+     * A better hack is to remove the net force every
+     * step, but that must be done at a higher level
+     * since this routine doesn't see all atoms if running
+     * in parallel. Don't know how important it is?  EL 990726
+     */
+}
+
 
 static real gather_energy_bsplines(gmx_pme_t pme, real *grid,
                                    pme_atomcomm_t *atc)
@@ -2694,6 +2819,64 @@ static real gather_energy_bsplines(gmx_p
     return energy;
 }
 
+static void gather_pot_bsplines_dftb(gmx_pme_t pme, real *grid,
+                                   pme_atomcomm_t *atc,
+                                   double *potential)
+{
+    splinedata_t *spline;
+    int     n, ithx, ithy, ithz, i0, j0, k0;
+    int     index_x, index_xy;
+    int *   idxptr;
+    real    pot, tx, ty, gval;
+    real    *thx, *thy, *thz;
+    int     norder;
+    int     order;
+
+    spline = &atc->spline[0];
+
+    order = pme->pme_order;
+
+    for (n = 0; (n < atc->n); n++)
+    {
+        idxptr = atc->idx[n];
+        norder = n*order;
+
+        i0   = idxptr[XX];
+        j0   = idxptr[YY];
+        k0   = idxptr[ZZ];
+
+        /* Pointer arithmetic alert, next three statements */
+        thx  = spline->theta[XX] + norder;
+        thy  = spline->theta[YY] + norder;
+        thz  = spline->theta[ZZ] + norder;
+
+        pot = 0.;
+        for (ithx = 0; (ithx < order); ithx++)
+        {
+            index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
+            tx      = thx[ithx];
+
+            for (ithy = 0; (ithy < order); ithy++)
+            {
+                index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
+                ty       = thy[ithy];
+
+                for (ithz = 0; (ithz < order); ithz++)
+                {
+                    gval  = grid[index_xy+(k0+ithz)];
+                    // if (n==0) printf("GRID %3d %3d %3d %8d %12.7f\n", ithx, ithy, ithz, index_xy+(k0+ithz), grid[index_xy+(k0+ithz)]);
+                    pot  += tx*ty*thz[ithz]*gval;
+                }
+
+            }
+        }
+
+        potential[n] = (double) pot;
+    }
+
+    return;
+}
+
 /* Macro to force loop unrolling by fixing order.
  * This gives a significant performance gain.
  */
@@ -2776,6 +2959,28 @@ void make_bsplines(splinevec theta, spli
     }
 }
 
+void make_bsplines_dftb(splinevec theta, splinevec dtheta, int order,
+                   rvec fractx[], int nr, int ind[])
+{
+    /* construct splines for local atoms */
+    int  i, ii;
+    real *xptr;
+
+    for (i = 0; i < nr; i++)
+    {
+        /* do it always, not only for atoms carrying charge
+         */
+        ii = ind[i];
+        xptr = fractx[ii];
+        switch (order)
+        {
+            case 4:  CALC_SPLINE(4);     break;
+            case 5:  CALC_SPLINE(5);     break;
+            default: CALC_SPLINE(order); break;
+        }
+    }
+}
+
 
 void make_dft_mod(real *mod, real *data, int ndata)
 {
@@ -3706,106 +3911,421 @@ int gmx_pme_init(gmx_pme_t *         pme
     return 0;
 }
 
-static void reuse_pmegrids(const pmegrids_t *old, pmegrids_t *new)
+int gmx_pme_init_dftb(gmx_pme_t *         pmedata,
+                      t_commrec *         cr,
+                      int                 nnodes_major,
+                      int                 nnodes_minor,
+                      int                 homenr,
+                      gmx_bool            bReproducible,
+                      int                 nthread,
+                      gmx_pme_t           pmedata_orig)
 {
-    int d, t;
+    gmx_pme_t pme = NULL;
 
-    for (d = 0; d < DIM; d++)
+    int  use_threads, sum_use_threads, i;
+    ivec ndata;
+
+    if (debug)
     {
-        if (new->grid.n[d] > old->grid.n[d])
-        {
-            return;
-        }
+        fprintf(debug, "Creating PME data structures.\n");
     }
+    snew(pme, 1);
 
-    sfree_aligned(new->grid.grid);
-    new->grid.grid = old->grid.grid;
+    pme->sum_qgrid_tmp       = NULL;
+    pme->sum_qgrid_dd_tmp    = NULL;
+    pme->buf_nalloc          = 0;
 
-    if (new->grid_th != NULL && new->nthread == old->nthread)
+    pme->nnodes              = 1;
+    pme->bPPnode             = TRUE;
+
+    pme->nnodes_major        = nnodes_major;
+    pme->nnodes_minor        = nnodes_minor;
+
+#ifdef GMX_MPI
+    if (nnodes_major*nnodes_minor > 1)
     {
-        sfree_aligned(new->grid_all);
-        for (t = 0; t < new->nthread; t++)
+        pme->mpi_comm = cr->mpi_comm_mygroup;
+
+        MPI_Comm_rank(pme->mpi_comm, &pme->nodeid);
+        MPI_Comm_size(pme->mpi_comm, &pme->nnodes);
+        if (pme->nnodes != nnodes_major*nnodes_minor)
         {
-            new->grid_th[t].grid = old->grid_th[t].grid;
+            gmx_incons("PME rank count mismatch");
         }
     }
-}
-
-int gmx_pme_reinit(gmx_pme_t *         pmedata,
-                   t_commrec *         cr,
-                   gmx_pme_t           pme_src,
-                   const t_inputrec *  ir,
-                   ivec                grid_size)
-{
-    t_inputrec irc;
-    int homenr;
-    int ret;
-
-    irc     = *ir;
-    irc.nkx = grid_size[XX];
-    irc.nky = grid_size[YY];
-    irc.nkz = grid_size[ZZ];
+    else
+    {
+        pme->mpi_comm = MPI_COMM_NULL;
+    }
+#endif
 
-    if (pme_src->nnodes == 1)
+    if (pme->nnodes == 1)
     {
-        homenr = pme_src->atc[0].n;
+#ifdef GMX_MPI
+        pme->mpi_comm_d[0] = MPI_COMM_NULL;
+        pme->mpi_comm_d[1] = MPI_COMM_NULL;
+#endif
+        pme->ndecompdim   = 0;
+        pme->nodeid_major = 0;
+        pme->nodeid_minor = 0;
+#ifdef GMX_MPI
+        pme->mpi_comm_d[0] = pme->mpi_comm_d[1] = MPI_COMM_NULL;
+#endif
     }
     else
     {
-        homenr = -1;
-    }
+        if (nnodes_minor == 1)
+        {
+#ifdef GMX_MPI
+            pme->mpi_comm_d[0] = pme->mpi_comm;
+            pme->mpi_comm_d[1] = MPI_COMM_NULL;
+#endif
+            pme->ndecompdim   = 1;
+            pme->nodeid_major = pme->nodeid;
+            pme->nodeid_minor = 0;
 
-    ret = gmx_pme_init(pmedata, cr, pme_src->nnodes_major, pme_src->nnodes_minor,
-                       &irc, homenr, pme_src->bFEP_q, pme_src->bFEP_lj, FALSE, pme_src->nthread);
+        }
+        else if (nnodes_major == 1)
+        {
+#ifdef GMX_MPI
+            pme->mpi_comm_d[0] = MPI_COMM_NULL;
+            pme->mpi_comm_d[1] = pme->mpi_comm;
+#endif
+            pme->ndecompdim   = 1;
+            pme->nodeid_major = 0;
+            pme->nodeid_minor = pme->nodeid;
+        }
+        else
+        {
+            if (pme->nnodes % nnodes_major != 0)
+            {
+                gmx_incons("For 2D PME decomposition, #PME ranks must be divisible by the number of ranks in the major dimension");
+            }
+            pme->ndecompdim = 2;
 
-    if (ret == 0)
-    {
-        /* We can easily reuse the allocated pme grids in pme_src */
-        reuse_pmegrids(&pme_src->pmegrid[PME_GRID_QA], &(*pmedata)->pmegrid[PME_GRID_QA]);
-        /* We would like to reuse the fft grids, but that's harder */
+#ifdef GMX_MPI
+            MPI_Comm_split(pme->mpi_comm, pme->nodeid % nnodes_minor,
+                           pme->nodeid, &pme->mpi_comm_d[0]);  /* My communicator along major dimension */
+            MPI_Comm_split(pme->mpi_comm, pme->nodeid/nnodes_minor,
+                           pme->nodeid, &pme->mpi_comm_d[1]);  /* My communicator along minor dimension */
+
+            MPI_Comm_rank(pme->mpi_comm_d[0], &pme->nodeid_major);
+            MPI_Comm_size(pme->mpi_comm_d[0], &pme->nnodes_major);
+            MPI_Comm_rank(pme->mpi_comm_d[1], &pme->nodeid_minor);
+            MPI_Comm_size(pme->mpi_comm_d[1], &pme->nnodes_minor);
+#endif
+        }
+        pme->bPPnode = (cr->duty & DUTY_PP);
     }
 
-    return ret;
-}
+    pme->nthread = nthread;
 
+    /* Check if any of the PME MPI ranks uses threads */
+    use_threads = (pme->nthread > 1 ? 1 : 0);
+#ifdef GMX_MPI
+    if (pme->nnodes > 1)
+    {
+        MPI_Allreduce(&use_threads, &sum_use_threads, 1, MPI_INT,
+                      MPI_SUM, pme->mpi_comm);
+    }
+    else
+#endif
+    {
+        sum_use_threads = use_threads;
+    }
+    pme->bUseThreads = (sum_use_threads > 0);
 
-static void copy_local_grid(gmx_pme_t pme, pmegrids_t *pmegrids,
-                            int grid_index, int thread, real *fftgrid)
-{
-    ivec local_fft_ndata, local_fft_offset, local_fft_size;
-    int  fft_my, fft_mz;
-    int  nsx, nsy, nsz;
-    ivec nf;
-    int  offx, offy, offz, x, y, z, i0, i0t;
-    int  d;
-    pmegrid_t *pmegrid;
-    real *grid_th;
+    pme->bFEP_q      = pmedata_orig->bFEP_q;
+    pme->bFEP_lj     = pmedata_orig->bFEP_lj;
+    pme->bFEP        = pmedata_orig->bFEP;
+    pme->nkx         = pmedata_orig->nkx;
+    pme->nky         = pmedata_orig->nky;
+    pme->nkz         = pmedata_orig->nkz;
+    pme->bP3M        = pmedata_orig->bP3M;
+    pme->pme_order   = pmedata_orig->pme_order;
 
-    gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index],
-                                   local_fft_ndata,
-                                   local_fft_offset,
-                                   local_fft_size);
-    fft_my = local_fft_size[YY];
-    fft_mz = local_fft_size[ZZ];
+    /* Always constant electrostatics coefficients */
+    pme->epsilon_r   = pmedata_orig->epsilon_r;
 
-    pmegrid = &pmegrids->grid_th[thread];
+    /* Always constant LJ coefficients */
+    pme->ljpme_combination_rule = pmedata_orig->ljpme_combination_rule;
 
-    nsx = pmegrid->s[XX];
-    nsy = pmegrid->s[YY];
-    nsz = pmegrid->s[ZZ];
+    /* If we violate restrictions, generate a fatal error here */
+    gmx_pme_check_restrictions(pme->pme_order,
+                               pme->nkx, pme->nky, pme->nkz,
+                               pme->nnodes_major,
+                               pme->nnodes_minor,
+                               pme->bUseThreads,
+                               TRUE,
+                               NULL);
 
-    for (d = 0; d < DIM; d++)
+    if (pme->nnodes > 1)
     {
-        nf[d] = min(pmegrid->n[d] - (pmegrid->order - 1),
-                    local_fft_ndata[d] - pmegrid->offset[d]);
-    }
-
-    offx = pmegrid->offset[XX];
-    offy = pmegrid->offset[YY];
-    offz = pmegrid->offset[ZZ];
+        double imbal;
 
-    /* Directly copy the non-overlapping parts of the local grids.
-     * This also initializes the full grid.
+#ifdef GMX_MPI
+        MPI_Type_contiguous(DIM, mpi_type, &(pme->rvec_mpi));
+        MPI_Type_commit(&(pme->rvec_mpi));
+#endif
+
+        /* Note that the coefficient spreading and force gathering, which usually
+         * takes about the same amount of time as FFT+solve_pme,
+         * is always fully load balanced
+         * (unless the coefficient distribution is inhomogeneous).
+         */
+
+        imbal = pme_load_imbalance(pme);
+        if (imbal >= 1.2 && pme->nodeid_major == 0 && pme->nodeid_minor == 0)
+        {
+            fprintf(stderr,
+                    "\n"
+                    "NOTE: The load imbalance in PME FFT and solve is %d%%.\n"
+                    "      For optimal PME load balancing\n"
+                    "      PME grid_x (%d) and grid_y (%d) should be divisible by #PME_ranks_x (%d)\n"
+                    "      and PME grid_y (%d) and grid_z (%d) should be divisible by #PME_ranks_y (%d)\n"
+                    "\n",
+                    (int)((imbal-1)*100 + 0.5),
+                    pme->nkx, pme->nky, pme->nnodes_major,
+                    pme->nky, pme->nkz, pme->nnodes_minor);
+        }
+    }
+
+    /* For non-divisible grid we need pme_order iso pme_order-1 */
+    /* In sum_qgrid_dd x overlap is copied in place: take padding into account.
+     * y is always copied through a buffer: we don't need padding in z,
+     * but we do need the overlap in x because of the communication order.
+     */
+    init_overlap_comm(&pme->overlap[0], pme->pme_order,
+#ifdef GMX_MPI
+                      pme->mpi_comm_d[0],
+#endif
+                      pme->nnodes_major, pme->nodeid_major,
+                      pme->nkx,
+                      (div_round_up(pme->nky, pme->nnodes_minor)+pme->pme_order)*(pme->nkz+pme->pme_order-1));
+
+    /* Along overlap dim 1 we can send in multiple pulses in sum_fftgrid_dd.
+     * We do this with an offset buffer of equal size, so we need to allocate
+     * extra for the offset. That's what the (+1)*pme->nkz is for.
+     */
+    init_overlap_comm(&pme->overlap[1], pme->pme_order,
+#ifdef GMX_MPI
+                      pme->mpi_comm_d[1],
+#endif
+                      pme->nnodes_minor, pme->nodeid_minor,
+                      pme->nky,
+                      (div_round_up(pme->nkx, pme->nnodes_major)+pme->pme_order+1)*pme->nkz);
+
+    /* Double-check for a limitation of the (current) sum_fftgrid_dd code.
+     * Note that gmx_pme_check_restrictions checked for this already.
+     */
+    if (pme->bUseThreads && pme->overlap[0].noverlap_nodes > 1)
+    {
+        gmx_incons("More than one communication pulse required for grid overlap communication along the major dimension while using threads");
+    }
+
+    snew(pme->bsp_mod[XX], pme->nkx);
+    snew(pme->bsp_mod[YY], pme->nky);
+    snew(pme->bsp_mod[ZZ], pme->nkz);
+
+    /* The required size of the interpolation grid, including overlap.
+     * The allocated size (pmegrid_n?) might be slightly larger.
+     */
+    pme->pmegrid_nx = pme->overlap[0].s2g1[pme->nodeid_major] -
+        pme->overlap[0].s2g0[pme->nodeid_major];
+    pme->pmegrid_ny = pme->overlap[1].s2g1[pme->nodeid_minor] -
+        pme->overlap[1].s2g0[pme->nodeid_minor];
+    pme->pmegrid_nz_base = pme->nkz;
+    pme->pmegrid_nz      = pme->pmegrid_nz_base + pme->pme_order - 1;
+    set_grid_alignment(&pme->pmegrid_nz, pme->pme_order);
+
+    pme->pmegrid_start_ix = pme->overlap[0].s2g0[pme->nodeid_major];
+    pme->pmegrid_start_iy = pme->overlap[1].s2g0[pme->nodeid_minor];
+    pme->pmegrid_start_iz = 0;
+
+    make_gridindex5_to_localindex(pme->nkx,
+                                  pme->pmegrid_start_ix,
+                                  pme->pmegrid_nx - (pme->pme_order-1),
+                                  &pme->nnx, &pme->fshx);
+    make_gridindex5_to_localindex(pme->nky,
+                                  pme->pmegrid_start_iy,
+                                  pme->pmegrid_ny - (pme->pme_order-1),
+                                  &pme->nny, &pme->fshy);
+    make_gridindex5_to_localindex(pme->nkz,
+                                  pme->pmegrid_start_iz,
+                                  pme->pmegrid_nz_base,
+                                  &pme->nnz, &pme->fshz);
+
+    pme->spline_work = make_pme_spline_work(pme->pme_order);
+
+    ndata[0]    = pme->nkx;
+    ndata[1]    = pme->nky;
+    ndata[2]    = pme->nkz;
+
+    /* we only allocate one grid for QM/MM - DFTB
+     */
+    pme->ngrids = 1;
+    snew(pme->fftgrid, pme->ngrids);
+    snew(pme->cfftgrid, pme->ngrids);
+    snew(pme->pfft_setup, pme->ngrids);
+    pmegrids_init(&pme->pmegrid[0],
+                  pme->pmegrid_nx, pme->pmegrid_ny, pme->pmegrid_nz,
+                  pme->pmegrid_nz_base,
+                  pme->pme_order,
+                  pme->bUseThreads,
+                  pme->nthread,
+                  pme->overlap[0].s2g1[pme->nodeid_major]-pme->overlap[0].s2g0[pme->nodeid_major+1],
+                  pme->overlap[1].s2g1[pme->nodeid_minor]-pme->overlap[1].s2g0[pme->nodeid_minor+1]);
+    /* This routine will allocate the grid data to fit the FFTs */
+    gmx_parallel_3dfft_init(&pme->pfft_setup[0], ndata,
+                            &pme->fftgrid[0], &pme->cfftgrid[0],
+                            pme->mpi_comm_d,
+                            bReproducible, pme->nthread);
+
+    if (!pme->bP3M)
+    {
+        /* Use plain SPME B-spline interpolation */
+        make_bspline_moduli(pme->bsp_mod, pme->nkx, pme->nky, pme->nkz, pme->pme_order);
+    }
+    else
+    {
+        /* Use the P3M grid-optimized influence function */
+        make_p3m_bspline_moduli(pme->bsp_mod, pme->nkx, pme->nky, pme->nkz, pme->pme_order);
+    }
+
+    /* Use atc[0] for spreading */
+    init_atomcomm(pme, &pme->atc[0], nnodes_major > 1 ? 0 : 1, TRUE);
+    if (pme->ndecompdim >= 2)
+    {
+        init_atomcomm(pme, &pme->atc[1], 1, FALSE);
+    }
+
+    if (pme->nnodes == 1)
+    {
+        pme->atc[0].n = homenr;
+        pme_realloc_atomcomm_things(&pme->atc[0]);
+    }
+
+    pme->lb_buf1       = NULL;
+    pme->lb_buf2       = NULL;
+    pme->lb_buf_nalloc = 0;
+
+    {
+        int thread;
+
+        /* Use fft5d, order after FFT is y major, z, x minor */
+
+        snew(pme->work, pme->nthread);
+        for (thread = 0; thread < pme->nthread; thread++)
+        {
+            realloc_work(&pme->work[thread], pme->nkx);
+        }
+    }
+
+    *pmedata = pme;
+
+    return 0;
+}
+
+static void reuse_pmegrids(const pmegrids_t *old, pmegrids_t *new)
+{
+    int d, t;
+
+    for (d = 0; d < DIM; d++)
+    {
+        if (new->grid.n[d] > old->grid.n[d])
+        {
+            return;
+        }
+    }
+
+    sfree_aligned(new->grid.grid);
+    new->grid.grid = old->grid.grid;
+
+    if (new->grid_th != NULL && new->nthread == old->nthread)
+    {
+        sfree_aligned(new->grid_all);
+        for (t = 0; t < new->nthread; t++)
+        {
+            new->grid_th[t].grid = old->grid_th[t].grid;
+        }
+    }
+}
+
+int gmx_pme_reinit(gmx_pme_t *         pmedata,
+                   t_commrec *         cr,
+                   gmx_pme_t           pme_src,
+                   const t_inputrec *  ir,
+                   ivec                grid_size)
+{
+    t_inputrec irc;
+    int homenr;
+    int ret;
+
+    irc     = *ir;
+    irc.nkx = grid_size[XX];
+    irc.nky = grid_size[YY];
+    irc.nkz = grid_size[ZZ];
+
+    if (pme_src->nnodes == 1)
+    {
+        homenr = pme_src->atc[0].n;
+    }
+    else
+    {
+        homenr = -1;
+    }
+
+    ret = gmx_pme_init(pmedata, cr, pme_src->nnodes_major, pme_src->nnodes_minor,
+                       &irc, homenr, pme_src->bFEP_q, pme_src->bFEP_lj, FALSE, pme_src->nthread);
+
+    if (ret == 0)
+    {
+        /* We can easily reuse the allocated pme grids in pme_src */
+        reuse_pmegrids(&pme_src->pmegrid[PME_GRID_QA], &(*pmedata)->pmegrid[PME_GRID_QA]);
+        /* We would like to reuse the fft grids, but that's harder */
+    }
+
+    return ret;
+}
+
+
+static void copy_local_grid(gmx_pme_t pme, pmegrids_t *pmegrids,
+                            int grid_index, int thread, real *fftgrid)
+{
+    ivec local_fft_ndata, local_fft_offset, local_fft_size;
+    int  fft_my, fft_mz;
+    int  nsx, nsy, nsz;
+    ivec nf;
+    int  offx, offy, offz, x, y, z, i0, i0t;
+    int  d;
+    pmegrid_t *pmegrid;
+    real *grid_th;
+
+    gmx_parallel_3dfft_real_limits(pme->pfft_setup[grid_index],
+                                   local_fft_ndata,
+                                   local_fft_offset,
+                                   local_fft_size);
+    fft_my = local_fft_size[YY];
+    fft_mz = local_fft_size[ZZ];
+
+    pmegrid = &pmegrids->grid_th[thread];
+
+    nsx = pmegrid->s[XX];
+    nsy = pmegrid->s[YY];
+    nsz = pmegrid->s[ZZ];
+
+    for (d = 0; d < DIM; d++)
+    {
+        nf[d] = min(pmegrid->n[d] - (pmegrid->order - 1),
+                    local_fft_ndata[d] - pmegrid->offset[d]);
+    }
+
+    offx = pmegrid->offset[XX];
+    offy = pmegrid->offset[YY];
+    offz = pmegrid->offset[ZZ];
+
+    /* Directly copy the non-overlapping parts of the local grids.
+     * This also initializes the full grid.
      */
     grid_th = pmegrid->grid;
     for (x = 0; x < nf[XX]; x++)
@@ -4348,15 +4868,157 @@ static void spread_on_grid(gmx_pme_t pme
 #endif
 }
 
-
-static void dump_grid(FILE *fp,
-                      int sx, int sy, int sz, int nx, int ny, int nz,
-                      int my, int mz, const real *g)
+static void spread_on_grid_dftb(gmx_pme_t pme,
+                                pme_atomcomm_t *atc, pmegrids_t *grids,
+                                real *fftgrid)
 {
-    int x, y, z;
-
-    for (x = 0; x < nx; x++)
-    {
+    const int grid_index = 0; /* this is a parameter in the original routine spread_on_grid() */
+    int nthread, thread;
+#ifdef PME_TIME_THREADS
+    gmx_cycles_t c1, c2, c3, ct1a, ct1b, ct1c;
+    static double cs1     = 0, cs2 = 0, cs3 = 0;
+    static double cs1a[6] = {0, 0, 0, 0, 0, 0};
+    static int cnt        = 0;
+#endif
+
+    nthread = pme->nthread;
+    assert(nthread > 0);
+
+#ifdef PME_TIME_THREADS
+    c1 = omp_cyc_start();
+#endif
+#pragma omp parallel for num_threads(nthread) schedule(static)
+    for (thread = 0; thread < nthread; thread++)
+    {
+        int start, end;
+
+        start = atc->n* thread   /nthread;
+        end   = atc->n*(thread+1)/nthread;
+
+        /* Compute fftgrid index for all atoms,
+         * with help of some extra variables.
+         */
+        calc_interpolation_idx(pme, atc, start, grid_index, end, thread);
+    }
+#ifdef PME_TIME_THREADS
+    c1   = omp_cyc_end(c1);
+    cs1 += (double)c1;
+#endif
+
+#ifdef PME_TIME_THREADS
+    c2 = omp_cyc_start();
+#endif
+#pragma omp parallel for num_threads(nthread) schedule(static)
+    for (thread = 0; thread < nthread; thread++)
+    {
+        splinedata_t *spline;
+        pmegrid_t *grid = NULL;
+
+        /* make local bsplines  */
+        if (grids == NULL || !pme->bUseThreads)
+        {
+            spline = &atc->spline[0];
+
+            spline->n = atc->n;
+
+            grid = &grids->grid;
+        }
+        else
+        {
+            spline = &atc->spline[thread];
+
+            if (grids->nthread == 1)
+            {
+                /* One thread, we operate on all coefficients */
+                spline->n = atc->n;
+            }
+            else
+            {
+                /* Get the indices our thread should operate on */
+                make_thread_local_ind(atc, thread, spline);
+            }
+
+            grid = &grids->grid_th[thread];
+        }
+
+        make_bsplines_dftb(spline->theta, spline->dtheta, pme->pme_order,
+                           atc->fractx, spline->n, spline->ind);
+
+        /* put local atoms on grid. */
+#ifdef PME_TIME_SPREAD
+        ct1a = omp_cyc_start();
+#endif
+        spread_coefficients_bsplines_thread(grid, atc, spline, pme->spline_work);
+
+        if (pme->bUseThreads)
+        {
+            copy_local_grid(pme, grids, grid_index, thread, fftgrid);
+        }
+#ifdef PME_TIME_SPREAD
+        ct1a          = omp_cyc_end(ct1a);
+        cs1a[thread] += (double)ct1a;
+#endif
+    }
+#ifdef PME_TIME_THREADS
+    c2   = omp_cyc_end(c2);
+    cs2 += (double)c2;
+#endif
+
+    if (pme->bUseThreads)
+    {
+#ifdef PME_TIME_THREADS
+        c3 = omp_cyc_start();
+#endif
+#pragma omp parallel for num_threads(grids->nthread) schedule(static)
+        for (thread = 0; thread < grids->nthread; thread++)
+        {
+            reduce_threadgrid_overlap(pme, grids, thread,
+                                      fftgrid,
+                                      pme->overlap[0].sendbuf,
+                                      pme->overlap[1].sendbuf,
+                                      grid_index);
+        }
+#ifdef PME_TIME_THREADS
+        c3   = omp_cyc_end(c3);
+        cs3 += (double)c3;
+#endif
+
+        if (pme->nnodes > 1)
+        {
+            /* Communicate the overlapping part of the fftgrid.
+             * For this communication call we need to check pme->bUseThreads
+             * to have all ranks communicate here, regardless of pme->nthread.
+             */
+            sum_fftgrid_dd(pme, fftgrid, grid_index);
+        }
+    }
+
+#ifdef PME_TIME_THREADS
+    cnt++;
+    if (cnt % 20 == 0)
+    {
+        printf("idx %.2f spread %.2f red %.2f",
+               cs1*1e-9, cs2*1e-9, cs3*1e-9);
+#ifdef PME_TIME_SPREAD
+        for (thread = 0; thread < nthread; thread++)
+        {
+            printf(" %.2f", cs1a[thread]*1e-9);
+        }
+#endif
+        printf("\n");
+    }
+#endif
+}
+
+
+static void dump_grid(FILE *fp,
+                      int sx, int sy, int sz, int nx, int ny, int nz,
+                      int my, int mz, const real *g)
+{
+    int x, y, z;
+
+    for (x = 0; x < nx; x++)
+    {
         for (y = 0; y < ny; y++)
         {
             for (z = 0; z < nz; z++)
@@ -5344,4 +6006,642 @@ int gmx_pme_do(gmx_pme_t pme,
         }
     }
     return 0;
+}
+
+int gmx_pme_do_dftb(gmx_pme_t pme,
+                    int start,       int homenr,
+                    rvec x[],        rvec f[],
+                    real *chargeA,
+                    matrix box, t_commrec *cr,
+                    int  maxshift_x, int maxshift_y,
+                    t_nrnb *nrnb,    /* gmx_wallcycle_t wcycle, */
+                    matrix vir_q,      real ewaldcoeff_q,
+                    real *energy_q,
+                    int flags,       double *pot)
+{
+    int     d, i, j, k, ntot, npme;
+    int     nx, ny, nz;
+    int     n_d, local_ny;
+    pme_atomcomm_t *atc = NULL;
+    pmegrids_t *pmegrid = NULL;
+    real    *grid       = NULL;
+    real    *ptr;
+    rvec    *x_d, *f_d;
+    real    *coefficient = NULL;
+    real    energy_AB[4];
+    matrix  vir_AB[4];
+    gmx_parallel_3dfft_t pfft_setup;
+    real *  fftgrid;
+    t_complex * cfftgrid;
+    int     thread;
+
+    assert(pme->nnodes > 0);
+    assert(pme->nnodes == 1 || pme->ndecompdim > 0);
+
+    if (pme->nnodes > 1)
+    {
+        atc      = &pme->atc[0];
+        atc->npd = homenr;
+        if (atc->npd > atc->pd_nalloc)
+        {
+            atc->pd_nalloc = over_alloc_dd(atc->npd);
+            srenew(atc->pd, atc->pd_nalloc);
+        }
+        for (d = pme->ndecompdim-1; d >= 0; d--)
+        {
+            atc           = &pme->atc[d];
+            atc->maxshift = (atc->dimind == 0 ? maxshift_x : maxshift_y);
+        }
+    }
+    else
+    {
+        atc = &pme->atc[0];
+        /* This could be necessary for TPI */
+        pme->atc[0].n = homenr;
+        if (DOMAINDECOMP(cr))
+        {
+            pme_realloc_atomcomm_things(atc);
+        }
+        atc->x = x;
+        atc->f = f;
+    }
+
+    m_inv_ur0(box, pme->recipbox);
+
+    /* 
+     * QM/MM - DFTB: only one grid, therefore grid_index==0
+     * and this is not present explicitly (0 is hardcoded instead)
+     */
+
+    /* Unpack structure */
+    pmegrid    = &pme->pmegrid[0];
+    fftgrid    = pme->fftgrid[0];
+    cfftgrid   = pme->cfftgrid[0];
+    pfft_setup = pme->pfft_setup[0];
+    coefficient= chargeA + start;
+
+    grid = pmegrid->grid.grid;
+
+    if (debug)
+    {
+        fprintf(debug, "PME: number of ranks = %d, rank = %d\n",
+                cr->nnodes, cr->nodeid);
+        fprintf(debug, "Grid = %p\n", (void*)grid);
+        if (grid == NULL)
+        {
+            gmx_fatal(FARGS, "No grid!");
+        }
+    }
+    where();
+
+    if (pme->nnodes == 1)
+    {
+        atc->coefficient = coefficient;
+    }
+    else
+    {
+        /* wallcycle_start(wcycle, ewcPME_REDISTXF); */
+        do_redist_pos_coeffs(pme, cr, start, homenr, TRUE, x, coefficient);
+        where();
+
+        /* wallcycle_stop(wcycle, ewcPME_REDISTXF); */
+    }
+
+    if (debug)
+    {
+        fprintf(debug, "Rank= %6d, pme local particles=%6d\n",
+                cr->nodeid, atc->n);
+    }
+
+    if (flags & GMX_PME_SPREAD)
+    {
+        /* wallcycle_start(wcycle, ewcPME_SPREADGATHER); */
+
+        /* Spread the coefficients on a grid */
+        spread_on_grid_dftb(pme, &pme->atc[0], pmegrid, fftgrid);
+
+        inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
+        inc_nrnb(nrnb, eNR_SPREADBSP,
+                 pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
+
+        if (!pme->bUseThreads)
+        {
+            wrap_periodic_pmegrid(pme, grid);
+
+            /* sum contributions to local grid from other nodes */
+#ifdef GMX_MPI
+            if (pme->nnodes > 1)
+            {
+                gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_FORWARD);
+                where();
+            }
+#endif
+
+            copy_pmegrid_to_fftgrid(pme, grid, fftgrid, 0);
+        }
+
+        /* wallcycle_stop(wcycle, ewcPME_SPREADGATHER); */
+
+        /*
+           dump_local_fftgrid(pme,fftgrid);
+           exit(0);
+         */
+    }
+
+    /* Here we start a large thread parallel region */
+#pragma omp parallel num_threads(pme->nthread) private(thread)
+    {
+        thread = gmx_omp_get_thread_num();
+        if (flags & GMX_PME_SOLVE)
+        {
+            int loop_count;
+
+            /* do 3d-fft */
+            if (thread == 0)
+            {
+                /* wallcycle_start(wcycle, ewcPME_FFT); */
+            }
+            gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX,
+                                       thread, 0); /* wcycle); */
+            if (thread == 0)
+            {
+                /* wallcycle_stop(wcycle, ewcPME_FFT); */
+            }
+            where();
+
+            /* solve in k-space for our local cells */
+            if (thread == 0)
+            {
+                /* wallcycle_start(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME)); */
+            }
+            loop_count = solve_pme_yzx(pme, cfftgrid, ewaldcoeff_q,
+                                       box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
+                                       flags & GMX_PME_CALC_ENER_VIR,
+                                       pme->nthread, thread);
+
+            if (thread == 0)
+            {
+                /* wallcycle_stop(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME)); */
+                where();
+                inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
+            }
+        }
+
+        if (flags & (GMX_PME_CALC_F | GMX_PME_CALC_POT))
+        {
+            /* do 3d-invfft */
+            if (thread == 0)
+            {
+                where();
+                /* wallcycle_start(wcycle, ewcPME_FFT); */
+            }
+            gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL,
+                                       thread, 0); /* wcycle); */
+            if (thread == 0)
+            {
+                /* wallcycle_stop(wcycle, ewcPME_FFT); */
+
+                where();
+
+                if (pme->nodeid == 0)
+                {
+                    ntot  = pme->nkx*pme->nky*pme->nkz;
+                    npme  = ntot*log((real)ntot)/log(2.0);
+                    inc_nrnb(nrnb, eNR_FFT, 2*npme);
+                }
+
+                /* Note: this wallcycle region is closed below
+                   outside an OpenMP region, so take care if
+                   refactoring code here. */
+                /* wallcycle_start(wcycle, ewcPME_SPREADGATHER); */
+            }
+
+            copy_fftgrid_to_pmegrid(pme, fftgrid, grid, 0, pme->nthread, thread);
+        }
+    }
+    /* End of thread parallel section.
+     * With MPI we have to synchronize here before gmx_sum_qgrid_dd.
+     */
+
+    if (flags & GMX_PME_CALC_POT)
+    {
+        /* distribute local grid to all nodes */
+#ifdef GMX_MPI
+        if (pme->nnodes > 1)
+        {
+            gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_BACKWARD);
+        }
+#endif
+        where();
+
+        unwrap_periodic_pmegrid(pme, grid);
+
+        /* interpolate forces for our local atoms */
+
+        where();
+
+#pragma omp parallel for num_threads(pme->nthread) schedule(static)
+        for (thread = 0; thread < pme->nthread; thread++)
+        {
+            gather_pot_bsplines_dftb(pme, grid, atc, pot);
+        }
+
+        where();
+
+        inc_nrnb(nrnb, eNR_GATHERFBSP,
+                 pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+        /* Note: this wallcycle region is opened above inside an OpenMP
+           region, so take care if refactoring code here. */
+        /* wallcycle_stop(wcycle, ewcPME_SPREADGATHER); */
+    }
+
+    if (flags & GMX_PME_CALC_F)
+    {
+        /* distribute local grid to all nodes */
+#ifdef GMX_MPI
+        if (pme->nnodes > 1)
+        {
+            gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_BACKWARD);
+        }
+#endif
+        where();
+
+        unwrap_periodic_pmegrid(pme, grid);
+
+        /* interpolate forces for our local atoms */
+
+        where();
+
+        /* If we are running without parallelization,
+         * atc->f is the actual force array, not a buffer...
+         *
+         * QM/MM - DFTB: oh yes, clear it!
+         * 3rd argument to gather_f_bsplines is TRUE
+         */
+
+#pragma omp parallel for num_threads(pme->nthread) schedule(static)
+        for (thread = 0; thread < pme->nthread; thread++)
+        {
+            gather_f_bsplines(pme, grid, TRUE, atc,
+                              &atc->spline[thread], 1.);
+        }
+
+        where();
+
+        inc_nrnb(nrnb, eNR_GATHERFBSP,
+                 pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+        /* Note: this wallcycle region is opened above inside an OpenMP
+           region, so take care if refactoring code here. */
+        /* wallcycle_stop(wcycle, ewcPME_SPREADGATHER); */
+    }
+
+    if (flags & GMX_PME_CALC_ENER_VIR)
+    {
+        /* This should only be called on the master thread
+         * and after the threads have synchronized.
+         */
+        get_pme_ener_vir_q(pme, pme->nthread, &energy_AB[0], vir_AB[0]);
+    }
+    /* formerly - grid_index loop ended here */
+
+    if ((flags & GMX_PME_CALC_F) && pme->nnodes > 1)
+    {
+        /* wallcycle_start(wcycle, ewcPME_REDISTXF); */
+        for (d = 0; d < pme->ndecompdim; d++)
+        {
+            atc = &pme->atc[d];
+            if (d == pme->ndecompdim - 1)
+            {
+                n_d = homenr;
+                f_d = f + start;
+            }
+            else
+            {
+                n_d = pme->atc[d+1].n;
+                f_d = pme->atc[d+1].f;
+            }
+            if (DOMAINDECOMP(cr))
+            {
+                dd_pmeredist_f(pme, atc, n_d, f_d,
+                               d == pme->ndecompdim-1 && pme->bPPnode);
+            }
+        }
+
+        /* wallcycle_stop(wcycle, ewcPME_REDISTXF); */
+    }
+    where();
+
+    if (flags & GMX_PME_CALC_ENER_VIR)
+    {
+        *energy_q = energy_AB[0];
+        m_add(vir_q, vir_AB[0], vir_q);
+        if (debug)
+        {
+            fprintf(debug, "Electrostatic PME mesh energy: %g\n", *energy_q);
+        }
+    }
+    return 0;
+}
+
+int gmx_pme_do_dftb_mm_forces(gmx_pme_t pme,
+                              int start,       int homenr,
+                              rvec x[],        rvec f[],
+                              real *chargeA,
+                              matrix box, t_commrec *cr,
+                              int  maxshift_x, int maxshift_y,
+                              t_nrnb *nrnb,    /* gmx_wallcycle_t wcycle, */
+                              matrix vir_q,      real ewaldcoeff_q,
+                              real *energy_q,
+                              int flags)
+{
+    int     d, i, j, k, ntot, npme;
+    int     nx, ny, nz;
+    int     n_d, local_ny;
+    pme_atomcomm_t *atc = NULL;
+    pmegrids_t *pmegrid = NULL;
+    real    *grid       = NULL;
+    real    *ptr;
+    rvec    *x_d, *f_d;
+    real    *coefficient = NULL;
+    real    energy_AB[4];
+    matrix  vir_AB[4];
+    gmx_parallel_3dfft_t pfft_setup;
+    real *  fftgrid;
+    t_complex * cfftgrid;
+    int     thread;
+
+    assert(pme->nnodes > 0);
+    assert(pme->nnodes == 1 || pme->ndecompdim > 0);
+
+    if (pme->nnodes > 1)
+    {
+        atc      = &pme->atc[0];
+        atc->npd = homenr;
+        if (atc->npd > atc->pd_nalloc)
+        {
+            atc->pd_nalloc = over_alloc_dd(atc->npd);
+            srenew(atc->pd, atc->pd_nalloc);
+        }
+        for (d = pme->ndecompdim-1; d >= 0; d--)
+        {
+            atc           = &pme->atc[d];
+            atc->maxshift = (atc->dimind == 0 ? maxshift_x : maxshift_y);
+        }
+    }
+    else
+    {
+        atc = &pme->atc[0];
+        /* This could be necessary for TPI */
+        pme->atc[0].n = homenr;
+        if (DOMAINDECOMP(cr))
+        {
+            pme_realloc_atomcomm_things(atc);
+        }
+        atc->x = x;
+        atc->f = f;
+    }
+
+    m_inv_ur0(box, pme->recipbox);
+
+    /* 
+     * QM/MM - DFTB: only one grid, therefore grid_index==0
+     * and this is not present explicitly (0 is hardcoded instead)
+     */
+
+    /* Unpack structure */
+    pmegrid    = &pme->pmegrid[0];
+    fftgrid    = pme->fftgrid[0];
+    cfftgrid   = pme->cfftgrid[0];
+    pfft_setup = pme->pfft_setup[0];
+    coefficient= chargeA + start;
+
+    grid = pmegrid->grid.grid;
+
+    if (debug)
+    {
+        fprintf(debug, "PME: number of ranks = %d, rank = %d\n",
+                cr->nnodes, cr->nodeid);
+        fprintf(debug, "Grid = %p\n", (void*)grid);
+        if (grid == NULL)
+        {
+            gmx_fatal(FARGS, "No grid!");
+        }
+    }
+    where();
+
+    if (pme->nnodes == 1)
+    {
+        atc->coefficient = coefficient;
+    }
+    else
+    {
+        /* wallcycle_start(wcycle, ewcPME_REDISTXF); */
+        do_redist_pos_coeffs(pme, cr, start, homenr, TRUE, x, coefficient);
+        where();
+
+        /* wallcycle_stop(wcycle, ewcPME_REDISTXF); */
+    }
+
+    if (debug)
+    {
+        fprintf(debug, "Rank= %6d, pme local particles=%6d\n",
+                cr->nodeid, atc->n);
+    }
+
+    if (flags & GMX_PME_SPREAD)
+    {
+        /* wallcycle_start(wcycle, ewcPME_SPREADGATHER); */
+
+        /* Spread the coefficients on a grid */
+        spread_on_grid(pme, &pme->atc[0], pmegrid, TRUE, TRUE, fftgrid, TRUE, 0);
+
+        inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
+        inc_nrnb(nrnb, eNR_SPREADBSP,
+                 pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
+
+        if (!pme->bUseThreads)
+        {
+            wrap_periodic_pmegrid(pme, grid);
+
+            /* sum contributions to local grid from other nodes */
+#ifdef GMX_MPI
+            if (pme->nnodes > 1)
+            {
+                gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_FORWARD);
+                where();
+            }
+#endif
+
+            copy_pmegrid_to_fftgrid(pme, grid, fftgrid, 0);
+        }
+
+        /* wallcycle_stop(wcycle, ewcPME_SPREADGATHER); */
+
+        /*
+           dump_local_fftgrid(pme,fftgrid);
+           exit(0);
+         */
+    }
+
+    /* Here we start a large thread parallel region */
+#pragma omp parallel num_threads(pme->nthread) private(thread)
+    {
+        thread = gmx_omp_get_thread_num();
+        if (flags & GMX_PME_SOLVE)
+        {
+            int loop_count;
+
+            /* do 3d-fft */
+            if (thread == 0)
+            {
+                /* wallcycle_start(wcycle, ewcPME_FFT); */
+            }
+            gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX,
+                                       thread, 0); /* wcycle); */
+            if (thread == 0)
+            {
+                /* wallcycle_stop(wcycle, ewcPME_FFT); */
+            }
+            where();
+
+            /* solve in k-space for our local cells */
+            if (thread == 0)
+            {
+                /* wallcycle_start(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME)); */
+            }
+            loop_count = solve_pme_yzx(pme, cfftgrid, ewaldcoeff_q,
+                                       box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
+                                       flags & GMX_PME_CALC_ENER_VIR,
+                                       pme->nthread, thread);
+
+            if (thread == 0)
+            {
+                /* wallcycle_stop(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME)); */
+                where();
+                inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
+            }
+        }
+
+        if (flags & GMX_PME_CALC_F)
+        {
+            /* do 3d-invfft */
+            if (thread == 0)
+            {
+                where();
+                /* wallcycle_start(wcycle, ewcPME_FFT); */
+            }
+            gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL,
+                                       thread, 0); /* wcycle); */
+            if (thread == 0)
+            {
+                /* wallcycle_stop(wcycle, ewcPME_FFT); */
+
+                where();
+
+                if (pme->nodeid == 0)
+                {
+                    ntot  = pme->nkx*pme->nky*pme->nkz;
+                    npme  = ntot*log((real)ntot)/log(2.0);
+                    inc_nrnb(nrnb, eNR_FFT, 2*npme);
+                }
+
+                /* Note: this wallcycle region is closed below
+                   outside an OpenMP region, so take care if
+                   refactoring code here. */
+                /* wallcycle_start(wcycle, ewcPME_SPREADGATHER); */
+            }
+
+            copy_fftgrid_to_pmegrid(pme, fftgrid, grid, 0, pme->nthread, thread);
+        }
+    }
+    /* End of thread parallel section.
+     * With MPI we have to synchronize here before gmx_sum_qgrid_dd.
+     */
+
+    if (flags & GMX_PME_CALC_F)
+    {
+        /* distribute local grid to all nodes */
+#ifdef GMX_MPI
+        if (pme->nnodes > 1)
+        {
+            gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_BACKWARD);
+        }
+#endif
+        where();
+
+        unwrap_periodic_pmegrid(pme, grid);
+
+        /* interpolate forces for our local atoms */
+
+        where();
+
+        /* If we are running without parallelization,
+         * atc->f is the actual force array, not a buffer...
+         *
+         * QM/MM - DFTB: oh yes, clear it!
+         * 3rd argument to gather_f_bsplines is TRUE
+         */
+
+#pragma omp parallel for num_threads(pme->nthread) schedule(static)
+        for (thread = 0; thread < pme->nthread; thread++)
+        {
+            gather_fdivq_bsplines_dftb(pme, grid, atc,
+                              &atc->spline[thread]);
+        }
+
+        where();
+
+        inc_nrnb(nrnb, eNR_GATHERFBSP,
+                 pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+        /* Note: this wallcycle region is opened above inside an OpenMP
+           region, so take care if refactoring code here. */
+        /* wallcycle_stop(wcycle, ewcPME_SPREADGATHER); */
+    }
+
+    if (flags & GMX_PME_CALC_ENER_VIR)
+    {
+        /* This should only be called on the master thread
+         * and after the threads have synchronized.
+         */
+        get_pme_ener_vir_q(pme, pme->nthread, &energy_AB[0], vir_AB[0]);
+    }
+    /* formerly - grid_index loop ended here */
+
+    if ((flags & GMX_PME_CALC_F) && pme->nnodes > 1)
+    {
+        /* wallcycle_start(wcycle, ewcPME_REDISTXF); */
+        for (d = 0; d < pme->ndecompdim; d++)
+        {
+            atc = &pme->atc[d];
+            if (d == pme->ndecompdim - 1)
+            {
+                n_d = homenr;
+                f_d = f + start;
+            }
+            else
+            {
+                n_d = pme->atc[d+1].n;
+                f_d = pme->atc[d+1].f;
+            }
+            if (DOMAINDECOMP(cr))
+            {
+                dd_pmeredist_f(pme, atc, n_d, f_d,
+                               d == pme->ndecompdim-1 && pme->bPPnode);
+            }
+        }
+
+        /* wallcycle_stop(wcycle, ewcPME_REDISTXF); */
+    }
+    where();
+
+    if (flags & GMX_PME_CALC_ENER_VIR)
+    {
+        *energy_q = energy_AB[0];
+        m_add(vir_q, vir_AB[0], vir_q);
+        if (debug)
+        {
+            fprintf(debug, "Electrostatic PME mesh energy: %g\n", *energy_q);
+        }
+    }
+    return 0;
 }
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_broyden.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_broyden.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_broyden.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_broyden.c	2012-09-13 14:25:26.000000000 +0200
@@ -0,0 +1,238 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+// #include"charge_transfer.h"
+#include"qm_dftb.h"
+//   WEIGHTING FACTOR FOR THE ZEROTH ITERATION
+#define SQRW0 (0.0001)
+
+static int inverse(int isize, double *b, double *work, long *ipiv)
+{
+  extern void dsytrf_(char *, long *, double *, long *, long *, double *, long *, long *);
+  extern void dsytri_(char *, long *, double *, long *, long *, double *, long *);
+  static long info, imatsz=IMATSZ_BROYDEN, size;
+  static char uplo='U';
+  size = (long) isize;
+
+  dsytrf_(&uplo, &size, b, &imatsz, ipiv, work, &imatsz, &info);
+  // printf("dsytrf info: %ld\n", info);
+  if (info) return info;
+  dsytri_(&uplo, &size, b, &imatsz, ipiv, work, &info);
+  // printf("dsytri info: %ld\n", info);
+  return info;
+} 
+
+void broyden(int niter, double alpha, int jtop, double *vecin, double *vecout, dftb_broyden_t *arrays)
+{
+  double amix, dfnorm, fnorm, fac1, fac2, aij, cmj, gmi, wtmp;
+  int lastit, iter, i, j, k, lm, ln, ip, ivsiz;
+  static double uamix;
+  static int ilastit;
+
+  // printf("START BROYDEN - iter %d\n", niter);
+  // for (k=0; k<jtop; k++) {
+  //   printf("%3d%10.6f%10.6f\n", k+1, vecin[k], vecout[k]);
+  // }
+
+  iter = niter;
+  if (niter >= MAXITER_BROYDEN)
+    iter = iter % MAXITER_BROYDEN;
+  /* if (iter == 0)
+    return; */
+
+  // ++++++ SET UP THE VECTOR OF THE CURRENT ITERATION FOR MIXING ++++++
+  // 
+  // FOR THIS METHOD WE HAVE ONLY SAVED INPUT/OUTPUT CHG. DENSITIES,
+  for (k=0; k<jtop; k++) {
+    arrays->vector[k][0] = vecin[k];
+    arrays->vector[k][1] = vecout[k];
+  }
+  // ++++++ END OF PROGRAM SPECIFIC LOADING OF VECTOR FROM MAIN ++++++++
+  // 
+  // IVSIZ IS THE LENGTH OF THE VECTOR
+ 
+  ivsiz = jtop;
+
+  // 
+  // 
+  // *******************  BEGIN BROYDEN'S METHOD  **********************
+  // 
+
+  //     F:  THE DIFFERENCE OF PREVIOUS OUTPUT AND INPUT VECTORS
+  // DUMVI:  A DUMMY VECTOR, HERE IT IS THE PREVIOUS INPUT VECTOR
+  if (iter == 0) {
+    // IF THIS IS THE FIRST ITERATION, THEN LOAD
+    // F=VECTOR(OUT)-VECTOR(IN) AND VECTOR(IN)
+    // PRINT*,'SIMPLE MIXING THIS ITERATION'
+    lastit = 0;
+    amix = alpha;
+    uamix = amix;
+    ilastit = lastit;
+    for (k=0; k<ivsiz; k++) {
+      arrays->f[k] = arrays->vector[k][1] - arrays->vector[k][0];
+      arrays->unit31[k][0] = arrays->f[k];
+      arrays->unit31[k][1] = arrays->vector[k][0];
+      // SINCE WE ARE ON THE FIRST ITERATION, SIMPLY MIX THE VECTOR.
+      arrays->dumvi[k] = arrays->vector[k][0] + amix * arrays->f[k];
+    }
+  } else {
+    // ALL THIS IS PERFORMED IN FURTHER ITERATIONS
+    amix = uamix;
+    lastit = ilastit;
+    // FOR I-TH ITER.,DFNORM IS ( F(I) MINUS F(I-1) ), USED FOR NORMALIZATION
+    dfnorm = 0.0;
+    fnorm = 0.0;
+    for (k=0; k<ivsiz; k++) {
+       // ALPHA (OR AMIX) IS SIMPLE MIXING PARAMETER
+      arrays->dumvi[k] = arrays->vector[k][0] - arrays->unit31[k][1];
+      arrays->df[k] = arrays->vector[k][1] - arrays->vector[k][0] - arrays->unit31[k][0];
+      arrays->f[k] = arrays->vector[k][1] - arrays->vector[k][0];
+      dfnorm += arrays->df[k] * arrays->df[k];
+      fnorm += arrays->f[k] * arrays->f[k];
+    }
+    dfnorm = sqrt(dfnorm);
+    fnorm = sqrt(fnorm);
+
+    fac2 = 1.0 / dfnorm;
+    fac1 = amix * fac2;
+
+    for (k=0; k<ivsiz; k++) {
+      arrays->ui[k] = fac1 * arrays->df[k] + fac2 * arrays->dumvi[k];
+      arrays->vti[k] = fac2 * arrays->df[k];
+    }
+
+    // *********** CALCULATION OF COEFFICIENT MATRICES *************
+    // ***********    AND THE SUM FOR CORRECTIONS      *************
+    // 
+    // RECALL: A(I,J) IS A SYMMETRIC MATRIX
+    //       : B(I,J) IS THE INVERSE OF [ W0**2 I + A ]
+    //
+    lastit++;
+
+    // DUMVI IS THE U(OF I) AND T1 IS THE VT(OF I)
+    // FROM THE PREVIOUS ITERATIONS
+    if (lastit > 1) {
+      for (j=0; j<lastit-1; j++) {
+        aij = cmj = 0.0;
+	for (k=0; k<ivsiz; k++) {
+	  arrays->dumvi[k] = arrays->unit32[k][0][j];
+	  arrays->t1[k] = arrays->unit32[k][1][j];
+	  cmj += arrays->t1[k] * arrays->f[k];
+	  aij += arrays->t1[k] * arrays->vti[k];
+	}
+	arrays->a[lastit-1][j] = arrays->a[j][lastit-1] = aij;
+	//printf("a[%d][%d] = %f\n", j, lastit-1, aij);
+	arrays->cm[j] = cmj;
+      }
+    }
+
+    aij = cmj = 0.0;
+    for (k=0; k<ivsiz; k++) {
+      cmj += arrays->vti[k] * arrays->f[k];
+      aij += arrays->vti[k] * arrays->vti[k];
+    }
+    arrays->a[lastit-1][lastit-1] = aij;
+    //printf("a[%d][%d] = %f\n", lastit-1, lastit-1, aij);
+    arrays->cm[lastit-1] = cmj;
+
+    for (k=0; k<ivsiz; k++) {
+      arrays->unit32[k][0][lastit-1] = arrays->ui[k];
+      arrays->unit32[k][1][lastit-1] = arrays->vti[k];
+    }
+
+    // THE WEIGHTING FACTORS FOR EACH ITERATION HAVE BEEN CHOSEN
+    // EQUAL TO ONE OVER THE R.M.S. ERROR. THIS NEED NOT BE THE CASE.
+    if (fnorm > 1.0e-7)
+      wtmp = 0.01 / fnorm;
+    else
+      wtmp = 1.0e5;
+    if (wtmp < 1.0)
+      wtmp = 1.0;
+    arrays->w[lastit-1] = wtmp;
+    // WRITE(66,'(''  WEIGHTING SET =  '',E12.6)')WTMP
+
+    // WITH THE CURRENT ITERATIONS F AND VECTOR CALCULATED,
+    // WRITE THEM TO UNIT 31 FOR USE LATER.
+    uamix = amix;
+    ilastit = lastit;
+    for (k=0; k<ivsiz; k++) {
+      arrays->unit31[k][0] = arrays->f[k];
+      arrays->unit31[k][1] = arrays->vector[k][0];
+    }
+
+    // SET UP AND CALCULATE BETA MATRIX
+    for (lm=0; lm < lastit; lm++) {
+      for (ln=0; ln < lastit; ln++)
+        arrays->b_lapack[ln * IMATSZ_BROYDEN + lm] 
+		= arrays->b[ln][lm] 
+		= arrays->a[ln][lm] * arrays->w[ln] * arrays->w[lm];
+      arrays->b_lapack[lm * IMATSZ_BROYDEN + lm] 
+	      = arrays->b[lm][lm] 
+	      = SQRW0 + arrays->a[lm][lm] * arrays->w[lm] * arrays->w[lm];
+    }
+
+    // print out the matrix
+    // printf("w vector:\n");
+    //   for (ln=0; ln<lastit; ln++)
+    //     printf("%9.5f", arrays->w[ln]);
+    //   printf("\n");
+    // printf("beta matrix:\n");
+    // for (lm=0; lm<lastit; lm++) {
+    //   for (ln=0; ln<lastit; ln++)
+    //     printf("%9.5f", arrays->b[lm][ln]);
+    //   printf("\n");
+    // }
+
+    // INVERT THE MATRIX USING LAPACK, INSTEAD OF CALL INVERSE(D,B,LASTM1)
+    if (inverse(lastit, arrays->b_lapack, arrays->work, arrays->ipiv) != 0) {
+      printf("Broyden: error in matrix inversion\n");
+      exit(-1);
+    }
+
+    for (lm=0; lm<lastit; lm++)
+      for (ln=0; ln<lastit; ln++)
+        arrays->b[lm][ln] = lm < ln ? 
+		arrays->b_lapack[ln * IMATSZ_BROYDEN + lm]:
+	       	arrays->b_lapack[lm * IMATSZ_BROYDEN + ln];
+
+    // print out the inverse
+    // printf("inverse of beta matrix:\n");
+    // for (lm=0; lm<lastit; lm++) {
+    //   for (ln=0; ln<lastit; ln++)
+    //     printf("%9.5f", arrays->b[lm][ln]);
+    //   printf("\n");
+    // }
+
+    // calculate the vector for the new iteration
+    for (k=0; k<ivsiz; k++)
+      arrays->dumvi[k] = arrays->vector[k][0] + amix * arrays->f[k];
+
+    for (i=0; i<lastit; i++) {
+      for (k=0; k<ivsiz; k++) {
+        arrays->ui[k] = arrays->unit32[k][0][i];
+        arrays->vti[k] = arrays->unit32[k][1][i];
+      }
+      gmi = 0.0;
+      for (ip=0; ip<lastit; ip++)
+        gmi += arrays->cm[ip] * arrays->b[ip][i] * arrays->w[ip];
+      for (k=0; k<ivsiz; k++) {
+        arrays->dumvi[k] -= gmi * arrays->ui[k] * arrays->w[i];
+	// printf("%9.5f%9.5f%9.5f%9.5f\n", arrays->dumvi[k], gmi, arrays->ui[k], arrays->w[i]);
+      }
+    }
+  }
+  //     END OF THE CALCULATION OF DUMVI, THE NEW VECTOR
+  //
+  //**********  THE END OF THE BROYDEN METHOD **************
+  //
+  //+++++ PROGRAM SPECIFIC CODE OF RELOADING ARRAYS +++++++++
+  //
+  // NEED TO UNLOAD THE NEW VECTOR INTO THE APPROPRIATE ARRAYS.
+  for (k=0; k<jtop; k++)
+    vecin[k] = arrays->dumvi[k];
+  // printf("END BROYDEN - iter %d\n", niter);
+  // for (k=0; k<jtop; k++) {
+  //   printf("%3d%10.6f%10.6f\n", k+1, vecin[k], vecout[k]);
+  // }
+  return;
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb.c	2015-02-20 16:51:53.285815818 +0100
@@ -0,0 +1,614 @@
+/*************************
+ * Charge transfer in DNA
+ * Tomas Kubar
+ *************************/
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include "qm_dftb.h"
+
+// #ifdef GMX_MPI
+// #define PRINTF(...) if (ct_mpi_rank==0) printf(__VA_ARGS__)
+// #else
+#define PRINTF(...) printf(__VA_ARGS__)
+// #endif
+
+/****************************
+ * INITIALIZE THE DFTB CODE *
+ ****************************/
+
+void init_dftb(t_QMrec *qm, t_MMrec *mm, t_inputrec *ir)
+{
+  dftb_t *dftb=NULL;
+  char slko_path[168], slko_separator[20], slko_suffix[20], cdko_path[144], *env;
+  gmx_bool slko_lowercase;
+
+  char
+    periodic_system[37][3]={"XX","H", "He",
+                                 "Li","Be","B", "C", "N", "O", "F", "Ne",
+                                 "Na","Mg","Al","Si","P", "S", "Cl","Ar",
+                                 "K", "Ca","Sc","Ti","V", "Cr","Mn","Fe","Co",
+                                 "Ni","Cu","Zn","Ga","Ge","As","Se","Br","Kr"};
+  //const char *suffix = "-c.spl";
+  double
+    atom_masses[37]={ 0.000,  1.008,  4.003,  
+                              6.941,  9.012, 10.811, 12.011, 14.007, 15.999, 18.998, 20.180, 
+                             22.990, 24.305, 26.982, 28.086, 30.974, 32.065, 35.453, 39.948, 
+                             39.098, 40.078, 44.956, 47.867, 50.942, 51.996, 54.938, 55.845, 58.933, 
+                             58.693, 63.546, 65.409, 69.723, 72.64 , 74.922, 78.96 , 79.904, 83.798};
+		      /* source: Pure Appl. Chem., Vol. 78, No. 11, pp. 2051–2066, 2006.
+		       * (IUPAC) doi:10.1351/pac200678112051
+		       */
+
+  char filename[216], charstring[128], elem1[3], elem2[3];
+  int i, j, k, l, izpj, counter, found_element;
+  double espin, qzeroh[3], uhubbh[3], mass;
+  FILE *f;
+
+  PRINTF("Initializing SCC-DFTB\n");
+
+  if ((env = getenv("GMX_DFTB_SLKO_PATH")) != NULL) {
+    strcpy(slko_path, env);
+  } else {
+    strcpy(slko_path, ir->QMdftbslkopath);
+  }
+  PRINTF("The DFTB SLKO files will be sought in directory %s\n", slko_path);
+
+  slko_lowercase = FALSE;
+  if ((env = getenv("GMX_DFTB_SLKO_LOWERCASE")) != NULL) {
+    if (env[0] == '1' || env[0] == 'T' || env[0] == 'Y' || env[0] == 't' || env[0] == 'y')
+      slko_lowercase = TRUE;
+    else
+      slko_lowercase = FALSE;
+  } else {
+    slko_lowercase = ir->QMdftbslkolowercase;
+  }
+  PRINTF("   ... the element names are assumed to be %s\n", slko_lowercase ? "lowercase" : "uppercase (first character)");
+
+  if ((env = getenv("GMX_DFTB_SLKO_SEPARATOR")) != NULL) {
+    strcpy(slko_separator, env);
+  } else {
+    strcpy(slko_separator, ir->QMdftbslkoseparator);
+  }
+  PRINTF("   ... and separated by character(s): %s\n", slko_separator[0] ? slko_separator : "(none)");
+
+  if ((env = getenv("GMX_DFTB_SLKO_SUFFIX")) != NULL) {
+    strcpy(slko_suffix, env);
+  } else {
+    strcpy(slko_suffix, ir->QMdftbslkosuffix);
+  }
+  PRINTF("   ... and the file name is terminated by character(s): %s\n", slko_suffix[0] ? slko_suffix : "(none)");
+
+  PRINTF("dftb = %p\n", qm->dftb);
+  snew(qm->dftb, 1);
+  PRINTF("dftb = %p\n", qm->dftb);
+  dftb = qm->dftb;
+  PRINTF("dftb = %p\n", dftb);
+
+  if ((env = getenv("GMX_DFTB_TELEC")) != NULL) {
+    dftb->phase1.telec = atof(env);
+  } else {
+    dftb->phase1.telec = qm->dftbtelec;
+  }
+  PRINTF("The electronic temperature for Fermi-Dirac distribution in SCC-DFTB has been set to %f K\n", dftb->phase1.telec);
+
+  dftb->atoms = qm->nrQMatoms;
+  dftb->extcharges = mm->nrMMatoms;
+  PRINTF("QM/MM with SCC-DFTB: there are %d QM atoms and %d MM atoms\n", dftb->atoms, dftb->extcharges);
+
+  if ((env = getenv("GMX_DFTB_QM_COORD")) != NULL) {
+    dftb->output_qm_freq = atoi(env);
+    PRINTF("The QM coordinates (XYZQ) will be saved in file qm.qxyz every %d steps.\n", dftb->output_qm_freq);
+  } else {
+    dftb->output_qm_freq = -1;
+  }
+  if ((env = getenv("GMX_DFTB_MM_COORD")) != NULL) {
+    dftb->output_mm_freq = atoi(env);
+    PRINTF("The MM coordinates (XYZQ) will be saved in file mm.qxyz every %d steps.\n", dftb->output_mm_freq);
+  } else {
+    dftb->output_mm_freq = -1;
+  }
+
+  /* Assign the atom numbers */
+    snew(dftb->atom, dftb->atoms);
+    snew(dftb->atomtype, dftb->atoms);
+    snew(dftb->phase1.mass, dftb->atoms);
+
+  /* determine the numbers of electrons and atom types (in arrays) */
+    dftb->phase1.nel = qm->nelectrons;
+    dftb->phase1.nn = dftb->atoms;
+
+  // cut-off and frequency for the manual QM/MM neighborsearching
+  dftb->rcoulomb_pme = ir->rcoulomb;
+  dftb->nstlist_pme = ir->nstlist;
+  dftb->lastlist_pme = - dftb->nstlist_pme; // make sure that the neighborsearching is done in the first step of simulation
+  if ((env = getenv("GMX_DFTB_CUTOFF")) != NULL) {
+    dftb->cutoff_qmmm = 1;
+    dftb->rlist_pme = dftb->rcoulomb_pme + QMMM_DFTB_SWITCH + QMMM_DFTB_LIST;
+    PRINTF("QM/MM with SCC-DFTB: cut-off with rcoulomb = %f nm, add. switch region of %f nm, add. nbsearch region of %f, nstlist = %d\n",
+      dftb->rcoulomb_pme, QMMM_DFTB_SWITCH, QMMM_DFTB_LIST, dftb->nstlist_pme);
+  } else {
+    if ((env = getenv("GMX_DFTB_RFIELD")) != NULL) {
+      dftb->cutoff_qmmm = 2;
+      dftb->rcoulomb_pme += QMMM_DFTB_SWITCH;
+      dftb->rlist_pme     = dftb->rcoulomb_pme + QMMM_DFTB_LIST;
+      PRINTF("QM/MM with SCC-DFTB: reaction field with eps=infinity, rcoulomb = %f nm, add. nbsearch region of %f, nstlist = %d\n",
+        dftb->rcoulomb_pme, QMMM_DFTB_LIST, dftb->nstlist_pme);
+    } else {
+      if ((env = getenv("GMX_DFTB_SHIFT")) != NULL) {
+        dftb->cutoff_qmmm = 3;
+        dftb->rcoulomb_pme += QMMM_DFTB_SWITCH;
+        dftb->rlist_pme     = dftb->rcoulomb_pme + QMMM_DFTB_LIST;
+        PRINTF("QM/MM with SCC-DFTB: shifted cut-off with rcoulomb = %f nm, add. nbsearch region of %f, nstlist = %d\n",
+          dftb->rcoulomb_pme, QMMM_DFTB_LIST, dftb->nstlist_pme);
+      } else {
+        dftb->cutoff_qmmm = 0;
+        dftb->rlist_pme = dftb->rcoulomb_pme;
+        PRINTF("QM/MM with SCC-DFTB: PME with rcoulomb = %f nm, nstlist = %d\n", dftb->rcoulomb_pme, dftb->nstlist_pme);
+      }
+    }
+  }
+
+  // PME surface correction or tin-foil boundary conditions?
+  if (! dftb->cutoff_qmmm) {
+    if ((env = getenv("GMX_DFTB_SURFACE_CORRECTION")) != NULL) {
+      dftb->surf_corr_pme = atoi(env);
+      PRINTF("QM/MM with SCC-DFTB: surface correction applied with epsilon_r = %d\n", dftb->surf_corr_pme = atoi(env));
+    } else {
+      dftb->surf_corr_pme = 0;
+      PRINTF("QM/MM with SCC-DFTB: no surface correction (tin-foil boundary conditions)\n");
+    }
+  }
+
+  PRINTF("DFTB_MAXTYPES = %d\n", DFTB_MAXTYPES);
+  dftb->elements = 0;
+  mass = 0.0;
+  for (j=0; j<dftb->atoms; j++) {
+    dftb->atom[j] = qm->indexQM[j];
+    found_element = 0;
+    for (k=0; k<dftb->elements; k++) {
+      if (qm->atomicnumberQM[j] == dftb->element[k]) {
+        dftb->atomtype[j] = k;
+        found_element = 1;
+      }
+    }
+    if (0 == found_element) {
+      dftb->atomtype[j] = dftb->elements;
+      dftb->element[dftb->elements] = qm->atomicnumberQM[j];
+      PRINTF("SCC-DFTB: found new chem. element - first atom %5d - at. number %d, SCC-DFTB index %d, element %s)\n",
+        dftb->atom[j]+1, qm->atomicnumberQM[j], dftb->atomtype[j], periodic_system[dftb->element[dftb->atomtype[j]]]);
+      dftb->elements++;
+      if (dftb->elements > DFTB_MAXTYPES) {
+        PRINTF("SCC-DFTB: Too many chemical elements!\n");
+        PRINTF("          Increase DFTB_MAXTYPES and recompile!\n");
+        exit(-1);
+      }
+    }
+    dftb->phase1.mass[j] = atom_masses[dftb->element[dftb->atomtype[j]]];
+    mass += dftb->phase1.mass[j];
+    //PRINTF("%5d (%5s, type %d, element %s)\n", dftb->atom[j], (*(top_global->moltype->atoms.atomname[dftb->atom[j]])), dftb->atomtype[j]+1, periodic_system[dftb->element[dftb->atomtype[j]+1]]);
+    PRINTF("%5d (type %d, element %s)\n", dftb->atom[j], dftb->atomtype[j]+1, periodic_system[dftb->element[dftb->atomtype[j]]]);
+  }
+  dftb->phase1.inv_tot_mass = 1.0 / mass;
+  PRINTF("\n");
+
+  /* Deal with the external charges */
+  snew(dftb->extcharge, dftb->extcharges);
+ 
+  /* assign the number of shells in atom types */
+  for (j=0; j<dftb->elements; j++) {
+    if (dftb->element[j] <=  2) {dftb->lmax[j] = 1; PRINTF("element no. %d: at. number %d, lmax %d\n", j, dftb->element[j], dftb->lmax[j]); continue;}
+    if (dftb->element[j] <= 12) {dftb->lmax[j] = 2; PRINTF("element no. %d: at. number %d, lmax %d\n", j, dftb->element[j], dftb->lmax[j]); continue;}
+    if (dftb->element[j] <= 20) {dftb->lmax[j] = 3; PRINTF("element no. %d: at. number %d, lmax %d\n", j, dftb->element[j], dftb->lmax[j]); continue;}
+    if (dftb->element[j] <= 38) {dftb->lmax[j] = 4; PRINTF("element no. %d: at. number %d, lmax %d\n", j, dftb->element[j], dftb->lmax[j]); continue;}
+    PRINTF("SCC-DFTB: A too high atomic number detected (%d), exiting!\n\n", dftb->element[j]);
+    exit(-1);
+  }
+
+  dftb->sccmode     = qm->dftbsccmode;
+  dftb->partial_pme = qm->dftbpartialpme;
+  dftb->dispersion  = qm->dftbdispersion;
+  dftb->cdko        = qm->dftbcdko;
+  dftb->mmhub_inf   = qm->dftbmmhubinf;
+  printf("  DFTB: SCC-DFTB mode %d!\n", dftb->sccmode);
+  if (! dftb->cutoff_qmmm) {
+    if (dftb->partial_pme)
+      printf("  DFTB: PME will be done for QM/MM with QM atoms only, starting from 2nd SCC iteration (partial PME)\n");
+    else
+      printf("  DFTB: full PME will be done for QM/MM every time\n");
+  }
+  printf("  QM/MM: MM electric field scaled by a factor of %f\n", mm->scalefactor);
+  printf("  DFTB: empirical dispersion ");
+  switch (dftb->dispersion) {
+    case 0: printf("not considered\n");
+            break;
+    case 1: printf("-- with Grimme's DFT-D3\n");
+            dispersion_dftd3_init(dftb, slko_path);
+            break;
+    case 2: printf("-- with Elstner's 2001\n");
+            break;
+  }
+  printf("  DFTB: charge-dependent Klopman--Ohno QM/MM interaction %s !\n", dftb->cdko ? "USED" : "NOT USED");
+  if (dftb->cdko)
+    printf("  DFTB: for CDKO, Hubbard param. of MM atoms %s set to infinity\n", dftb->mmhub_inf ? "SET" : "NOT SET");
+
+  /* DFTB - assign the Hubbard derivatives for 3rd order */
+  if (dftb->sccmode == 3) {
+    dftb->zeta1 = 4.0; // 4.05 or 4.2
+    for (j=0; j<dftb->elements; j++)
+      switch (dftb->element[j]) {
+	case  1: dftb->uhder1[j] = -0.1857 /*-0.16*/; break; // -0.1857
+	case  6: dftb->uhder1[j] = -0.1492 /*-0.23*/; break; // -0.1492
+	case  7: dftb->uhder1[j] = -0.1535 /*-0.13*/; break; // -0.1535
+	case  8: dftb->uhder1[j] = -0.1575 /*-0.19*/; break; // -0.1575
+	case  9: dftb->uhder1[j] = -0.16; break; // no value
+	case 11: dftb->uhder1[j] = -0.05; break; // -0.0453   XXX
+	case 15: dftb->uhder1[j] = -0.14; break; // -0.0702
+	case 16: dftb->uhder1[j] = -0.14; break; // -0.0695
+	default: printf("\nNo value of Hubbard derivative available for atomic number %d!\n", dftb->element[j]);
+		 printf("Cannot perform DFTB3 calculation, exiting!\n\n");
+		 exit(-1);
+      }
+  } else { /* DFTB2 (1998) - put zeroes into the array */
+    for (j=0; j<dftb->elements; j++)
+      dftb->uhder1[j] = 0.;
+    dftb->zeta1 = 0.;
+  }
+  /* DFTB - only the electron in the outermost shell shall be taken into account
+   * thus, subtract the core electrons from phase1.nel ! */
+  for (j=0; j<dftb->atoms; j++) {
+    if (dftb->element[dftb->atomtype[j]] <= 2) continue;
+    if (dftb->element[dftb->atomtype[j]] <= 10) {dftb->phase1.nel -=  2; continue;}
+    if (dftb->element[dftb->atomtype[j]] <= 18) {dftb->phase1.nel -= 10; continue;}
+    if (dftb->element[dftb->atomtype[j]] <= 36) {dftb->phase1.nel -= 18; continue;}
+    /* otherwise */ dftb->phase1.nel -= 36;
+  }
+  printf("  DFTB: we have %d electrons!\n", dftb->phase1.nel);
+    /*
+    switch (dftb->lmax[dftb->atomtype[j]]) {
+      case 1: break;
+      case 2: if (dftb->element[dftb->atomtype[j]] <= 10) dftb->phase1.nel -=  2; else dftb->phase1.nel -= 10; break;
+      case 3: if (dftb->element[dftb->atomtype[j]] <= 18) dftb->phase1.nel -= 10; else dftb->phase1.nel -= 18; break;
+      case 4: if (dftb->element[dftb->atomtype[j]] <= 36) dftb->phase1.nel -= 18; else dftb->phase1.nel -= 36; break;
+    }*/
+
+  for (i=0; i<dftb->elements; i++) {
+    dftb->qzero1[i] = 0.0;
+    dftb->uhubb1[i] = 0.0;
+    for (j=0; j<dftb->elements; j++) {
+      PRINTF("Atomtype pair %d-%d\n", i+1, j+1);
+      /* read the tables for DFTB phase 1 - calculation of monomers */
+      strcpy(elem1, periodic_system[dftb->element[i]]);
+      strcpy(elem2, periodic_system[dftb->element[j]]);
+      if (slko_lowercase) {
+        elem1[0] += 'a' - 'A';
+        elem2[0] += 'a' - 'A';
+      }
+      sprintf(filename, "%s%s%s%s%s", slko_path, elem1, slko_separator, elem2, slko_suffix);
+      f = fopen(filename, "r");
+      if (f == NULL) {
+        PRINTF("Cannot open the parameter file %s, exiting!\n", filename);
+	exit(-1);
+      }
+      //printf("fscanf(f, \"%%lf %%d\", &(dftb->dr1[i][j]), &(dftb->dim1[i][j]))\n");
+      fscanf(f, "%lf %d", &(dftb->dr1[i][j]), &(dftb->dim1[i][j]));
+      if (i == j) {
+        //printf("&(dftb->skself1[i][0]), &(dftb->skself1[i][1]), &(dftb->skself1[i][2]), &espin\n");
+        fscanf(f, "%lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
+                  &(dftb->skself1[i][0]), &(dftb->skself1[i][1]), &(dftb->skself1[i][2]), &espin,
+                  uhubbh+2, uhubbh+1, uhubbh, qzeroh+2, qzeroh+1, qzeroh);
+        dftb->uhubb1[i] = uhubbh[0];
+        for (k=0; k<3; k++)
+          dftb->qzero1[i] += qzeroh[k];
+      }
+      /* Slater-Koster tables */
+      snew(dftb->skhtab1[i][j], dftb->dim1[i][j]);
+      snew(dftb->skstab1[i][j], dftb->dim1[i][j]);
+      for (k=0; k<dftb->dim1[i][j]; k++) {
+        //printf("for (l=0; l<10; l++) fscanf(f, \"%%lf\", &(dftb->skhtab1[i][j][k][l]))\n");
+        for (l=0; l<10; l++) fscanf(f, "%lf", &(dftb->skhtab1[i][j][k][l]));
+        //printf("for (l=0; l<10; l++) fscanf(f, \"%%lf\", &(dftb->skstab1[i][j][k][l]))\n");
+        for (l=0; l<10; l++) fscanf(f, "%lf", &(dftb->skstab1[i][j][k][l]));
+      }
+      /* fit parameters for analytical DFTB -- like DFTBA in Gaussian */
+      // qm_dftb_slko_levmar(dftb, i, j);
+      /* repulsive energy - splines */
+      do {
+        fscanf(f, "%s\n", charstring);
+      } while (strcmp(charstring, "Spline"));
+      fscanf(f, "%d %lf", &(dftb->numint[i][j]), &(dftb->cutoff[i][j]));
+      fscanf(f, "%lf %lf %lf", dftb->efkt[i][j], dftb->efkt[i][j]+1, dftb->efkt[i][j]+2);
+      /* allocate the array */
+      snew(dftb->coeff[i][j], dftb->numint[i][j]);
+      snew(dftb->xr[i][j], dftb->numint[i][j]);
+      for (k=0; k<dftb->numint[i][j]-1; k++)
+        fscanf(f, "%lf %lf %lf %lf %lf %lf", dftb->xr[i][j][k], dftb->xr[i][j][k]+1, dftb->coeff[i][j][k], dftb->coeff[i][j][k]+1, dftb->coeff[i][j][k]+2, dftb->coeff[i][j][k]+3);
+      k = dftb->numint[i][j]-1;
+      fscanf(f, "%lf %lf %lf %lf %lf %lf %lf %lf", dftb->xr[i][j][k], dftb->xr[i][j][k]+1, dftb->coeff[i][j][k], dftb->coeff[i][j][k]+1, dftb->coeff[i][j][k]+2, dftb->coeff[i][j][k]+3, dftb->coeff[i][j][k]+4, dftb->coeff[i][j][k]+5);
+      if (SQR(dftb->xr[i][j][k][1] - dftb->cutoff[i][j]) > 1.e6) {
+        PRINTF("\nError in data file %s:\n  xr[i][j][k][1] != cutoff[i][j] (%f != %f)\n  Exiting!\n\n", filename, dftb->xr[i][j][k][1], dftb->cutoff[i][j]);
+        exit(-1);
+      }
+      /* file has been read */
+      PRINTF("skfile for pair %d-%d: %s\n", i+1, j+1, filename);
+      fclose(f);
+    }
+  }
+
+  /* prepare the broyden structures */
+  snew(dftb->broyden, 1);
+  snew(dftb->broyden->f, dftb->atoms);
+  snew(dftb->broyden->ui, dftb->atoms);
+  snew(dftb->broyden->vti, dftb->atoms);
+  snew(dftb->broyden->t1, dftb->atoms);
+  snew(dftb->broyden->dumvi, dftb->atoms);
+  snew(dftb->broyden->df, dftb->atoms);
+  snew(dftb->broyden->vector, dftb->atoms);
+  snew(dftb->broyden->unit31, dftb->atoms);
+  snew(dftb->broyden->unit32, dftb->atoms);
+
+  /* int arrays */
+  snew(dftb->phase1.izp, dftb->atoms);
+  snew(dftb->phase1.izpxh, dftb->atoms);
+  snew(dftb->phase1.ind, dftb->atoms + 1);
+  dftb->phase1.ind[0] = 0;
+  for (j=0; j<dftb->atoms; j++) {
+    dftb->phase1.izp[j] = dftb->atomtype[j];
+    if (dftb->sccmode == 3 && dftb->element[dftb->atomtype[j]] == 1) /* hydrogen requires a modified gamma function */
+      dftb->phase1.izpxh[j] = 1;
+    else
+      dftb->phase1.izpxh[j] = 0;
+    izpj = dftb->phase1.izp[j];
+    dftb->phase1.ind[j+1] = dftb->phase1.ind[j] + dftb->lmax[izpj]* dftb->lmax[izpj];
+    //printf("ind[%d] = %d\n", j+1, dftb->phase1.ind[j+1]);
+  }
+  dftb->phase1.ndim = dftb->phase1.norb = dftb->phase1.ind[dftb->atoms];
+  //printf("phase1.ndim = %d\n", dftb->phase1.ndim);
+  dftb->phase1.ne = dftb->extcharges;
+
+  /* double arrays */
+  snew(dftb->phase1.x, dftb->atoms);
+  snew(dftb->phase1.grad, dftb->atoms);
+  snew(dftb->phase1.partgrad, dftb->atoms);
+  snew(dftb->phase1.xe, dftb->extcharges);
+  snew(dftb->phase1.mmgrad, dftb->extcharges);
+  snew(dftb->phase1.ze, dftb->extcharges);
+  snew(dftb->phase1.qmat, dftb->atoms);
+  // qmat can is initialized right away!
+  for (j=0; j<dftb->atoms; j++)
+    dftb->phase1.qmat[j] = dftb->qzero1[dftb->phase1.izp[j]];
+  snew(dftb->phase1.qmold, dftb->atoms);
+  snew(dftb->phase1.qmulli, dftb->phase1.norb);
+  snew(dftb->phase1.ev, dftb->phase1.norb);
+  snew(dftb->phase1.occ, dftb->phase1.norb);
+  snew(dftb->phase1.a, dftb->phase1.norb);
+    snew(dftb->phase1.a[0], SQR(dftb->phase1.norb));
+    for(j = 1; j < dftb->phase1.norb; j++)
+      dftb->phase1.a[j] = dftb->phase1.a[0] + j * dftb->phase1.norb;
+  snew(dftb->phase1.b, dftb->phase1.norb);
+    snew(dftb->phase1.b[0], SQR(dftb->phase1.norb));
+    for(j = 1; j < dftb->phase1.norb; j++)
+      dftb->phase1.b[j] = dftb->phase1.b[0] + j * dftb->phase1.norb;
+  snew(dftb->phase1.a_trans, dftb->phase1.norb * dftb->phase1.norb);
+  snew(dftb->phase1.b_trans, dftb->phase1.norb * dftb->phase1.norb);
+  snew(dftb->phase1.hamil, dftb->phase1.norb);
+    snew(dftb->phase1.hamil[0], SQR(dftb->phase1.norb));
+    for(j = 1; j < dftb->phase1.norb; j++)
+      dftb->phase1.hamil[j] = dftb->phase1.hamil[0] + j * dftb->phase1.norb;
+  snew(dftb->phase1.overl, dftb->phase1.norb);
+    snew(dftb->phase1.overl[0], SQR(dftb->phase1.norb));
+    for(j = 1; j < dftb->phase1.norb; j++)
+      dftb->phase1.overl[j] = dftb->phase1.overl[0] + j * dftb->phase1.norb;
+  snew(dftb->phase1.gammamat, dftb->atoms);
+    snew(dftb->phase1.gammamat[0], SQR(dftb->atoms));
+    for(j = 1; j < dftb->atoms; j++)
+      dftb->phase1.gammamat[j] = dftb->phase1.gammamat[0] + j * dftb->atoms;
+  snew(dftb->phase1.gammader, dftb->atoms);
+    snew(dftb->phase1.gammader[0], SQR(dftb->atoms));
+    for(j = 1; j < dftb->atoms; j++)
+      dftb->phase1.gammader[j] = dftb->phase1.gammader[0] + j * dftb->atoms;
+  snew(dftb->phase1.shift, dftb->atoms);
+  snew(dftb->phase1.shift3, dftb->atoms);
+  snew(dftb->phase1.shift3a, dftb->atoms);
+  snew(dftb->phase1.shiftE, dftb->atoms);
+  snew(dftb->phase1.shiftE2, dftb->atoms);
+  snew(dftb->phase1.aux, 1 + 6 * dftb->phase1.norb + 2 * SQR(dftb->phase1.norb));
+  snew(dftb->phase1.iaux, 3 + 5 * dftb->phase1.norb);
+  snew(dftb->phase1.pot, dftb->atoms + dftb->extcharges);
+  snew(dftb->phase1.pot2, dftb->atoms + dftb->extcharges);
+  snew(dftb->phase1.pot3, dftb->atoms + dftb->extcharges);
+  snew(dftb->phase1.pot4, dftb->atoms + dftb->extcharges);
+  snew(dftb->phase1.pot5, dftb->atoms);
+  snew(dftb->phase1.pot6, dftb->atoms);
+  snew(dftb->phase1.pot7, dftb->atoms);
+
+  snew(dftb->phase1.neighbors_pme, dftb->atoms);
+  snew(dftb->phase1.neighbor_pme, dftb->atoms);
+
+  /* rvec and real (and nrnb) arrays */
+  snew(dftb->phase1.x_pme, dftb->atoms + dftb->extcharges);
+  snew(dftb->phase1.f_pme, dftb->atoms + dftb->extcharges);
+  snew(dftb->phase1.q_pme, dftb->atoms + dftb->extcharges);
+  snew(dftb->phase1.nrnb_pme, 1);
+
+  /* dvec array */
+  snew(dftb->phase1.gamma_deriv, dftb->atoms);
+    snew(dftb->phase1.gamma_deriv[0], SQR(dftb->atoms));
+    for(j = 1; j < dftb->atoms; j++)
+      dftb->phase1.gamma_deriv[j] = dftb->phase1.gamma_deriv[0] + j * dftb->atoms;
+
+  /* machine accuracy */
+  dftb->racc = 1.0;
+  while ((1.0 + dftb->racc) > 1.0)
+    dftb->racc /= 2.0;
+  dftb->racc *= 2.0;
+  dftb->dacc = 4 * dftb->racc;
+
+  /* charge-dependent Klopman--Ohno QM-MM interaction */
+  if (qm->dftbcdko) {
+    int *foundtype_dftb, current_atomno;
+    double alpha_read, beta_read;
+    if ((env = getenv("GMX_DFTB_CDKO_PATH")) != NULL) {
+      strcpy(cdko_path, env);
+    } else {
+      strcpy(cdko_path, "./cdko.par");
+    }
+    PRINTF("QM/MM SCC-DFTB: charge-dependent Klopman--Ohno interaction between QM and MM requested\n");
+    PRINTF("                the element-specific parameters alpha and beta will be read in from file\n");
+    PRINTF("                %s (can be overriden by setting the env. variable GMX_DFTB_CDKO_PATH)\n", cdko_path);
+    PRINTF("                format: atomic-number alpha beta, one element per line\n");
+    // read it in here
+    // ...
+    // end read in
+    snew(foundtype_dftb, dftb->elements);
+    f = fopen(cdko_path, "r");
+    if (f == NULL) {
+      PRINTF("\n   Error: file %s cannot be read\n   Exiting!\n\n", cdko_path);
+      exit(-1);
+    }
+    while (!feof(f)) {
+      fscanf(f, "%d %lf %lf", &current_atomno, &alpha_read, &beta_read);
+      for (i=0; i<dftb->elements; i++)
+        if (current_atomno == dftb->atomtype[i]) {
+	  foundtype_dftb[i] = 1;
+	  dftb->alpha1[i] = alpha_read;
+	  dftb->beta1[i]  = beta_read;
+	  break;
+	}
+    }
+    fclose(f);
+    for (i=0; i<dftb->elements; i++)
+      if (!foundtype_dftb[i]) {
+        PRINTF("   Error: no CDKO parameters found for DFTB type %d (element %s), and possibly others)\n   Exiting!\n\n",
+	  i, periodic_system[dftb->element[i]]);
+	exit(-1);
+      }
+    snew(dftb->phase1.partmmgrad, dftb->extcharges);
+  }
+
+  /* dispersion */
+  switch (dftb->dispersion) {
+    case 0: PRINTF("No empirical dispersion energy.\n");
+            break;
+    case 1: PRINTF("Dispersion energy considered with Grimme's D3 method.\n");
+            break;
+    case 2: PRINTF("Dispersion energy according to Elstner 2001 not implemented yet!\nExiting...\n\n");
+            exit(-1);
+            break;
+  }
+
+  return;
+}
+
+/******************************************
+ * PREREQUISITIES FOR A QM/MM CALCULATION *
+ *    TO BE PERFORMED IN EVERY MD STEP    *
+ ******************************************/
+
+// void prepare_charge_transfer(t_QMrec *qm, t_MMrec *mm, t_state *state, t_mdatoms *mdatoms)
+void prepare_dftb(t_QMrec *qm, t_MMrec *mm)
+{
+  dftb_t *dftb;
+  int i, j, k, counter;
+  ivec shift, shiftmin;
+  double bond_length, mindist, curdist, mass, sum;
+  char c;
+  dvec bond, extchg, box, image, com, coord, masscoord, r;
+
+  dftb = qm->dftb;
+
+  /* debug begin
+  printf("Information about system\n");
+  printf("Number of atoms: %d\n", mdatoms->nr);
+  printf("mdatoms->massA = %p, mdatoms->massT = %p, mdatoms->chargeA = %p\n", mdatoms->massA, mdatoms->massT, mdatoms->chargeA);
+  printf("Selected atoms - massT, charge:\n");
+  for (i=0; i<10; i++)
+    printf("%d %12.7f %12.7f\n", i, mdatoms->massT[i], mdatoms->chargeA[i]);
+  for (i=7270; i<7280; i++)
+    printf("%d %12.7f %12.7f\n", i, mdatoms->massT[i], mdatoms->chargeA[i]);
+  for (i=8270; i<8280; i++)
+    printf("%d %12.7f %12.7f\n", i, mdatoms->massT[i], mdatoms->chargeA[i]);
+     debug end */
+
+  // printf("prepare_dftb\n");
+
+  /* read the box dimensions */
+  //for (j=0; j<DIM; j++)
+  //  box[j] = state->box[j][j] * NM_TO_BOHR;
+  //printf("BOX %12.7f %12.7f %12.7f\n", box[XX], box[YY], box[ZZ]);
+
+  /* check the number of QM atoms */
+  //printf("\nThe number of QM atoms is %d\n", qm->nrQMatoms);
+  if (qm->nrQMatoms != dftb->atoms) {
+    printf("\nThe number of QM atoms has changed (was %d, is %d),\n  exiting!\n\n", dftb->atoms, qm->nrQMatoms);
+    exit(-1);
+  }
+  /* read coordinates of the quantum system */
+  /* conversion from nanometer to bohr */
+    for (j=0; j<dftb->atoms; j++)
+      for (k=0; k<3; k++)
+        dftb->phase1.x[j][k] = (double) qm->xQM[j][k] * NM_TO_BOHR;
+
+  /* test - write out the coordinates */
+  //for (i=0; i<ct->sites; i++) {
+    //printf("Site %d - %d atoms\n", i+1, dftb->phase1[i].nn);
+  //printf("%d\ntest coordinates from qm->xQM\n", dftb->phase1.nn);
+  //for (j=0; j<dftb->phase1.nn; j++) {
+  //  switch (dftb->phase1.izp[j]) {
+  //    case 0: c = 'C'; break;
+  //    case 1: c = 'H'; break;
+  //    case 2: c = 'O'; break;
+  //    case 3: c = 'N'; break;
+  //  }
+  //  printf("%c %12.7f%12.7f%12.7f\n", c, dftb->phase1.x[j][0]*10/NM_TO_BOHR, dftb->phase1.x[j][1]*10/NM_TO_BOHR, dftb->phase1.x[j][2]*10/NM_TO_BOHR);
+  //}
+ 
+
+  /* get the center of mass */
+    clear_dvec(com);
+    for (j=0; j<dftb->atoms; j++) {
+      coord[XX] = dftb->phase1.x[j][XX];
+      coord[YY] = dftb->phase1.x[j][YY];
+      coord[ZZ] = dftb->phase1.x[j][ZZ];
+      dsvmul(dftb->phase1.mass[j], coord, masscoord);
+      dvec_inc(com, masscoord);
+    }
+    // dsvmul(dftb->phase1[i].inv_tot_mass, masscoord, dftb->phase1[i].com); - WRONG, ISN'T IT???
+    dsvmul(dftb->phase1.inv_tot_mass, com, dftb->phase1.com);
+    //printf("COM: %f %f %f\n", dftb->phase1.com[XX] * 10/NM_TO_BOHR, dftb->phase1.com[YY] * 10/NM_TO_BOHR, dftb->phase1.com[ZZ] * 10/NM_TO_BOHR);
+
+  /* deal with the EXTERNAL CHARGES */
+  /* update the number of external charges */
+  dftb->extcharges = mm->nrMMatoms;
+  dftb->phase1.ne = mm->nrMMatoms;
+  printf("\nThe number of MM atoms is %d\n", mm->nrMMatoms);
+
+  /* read coordinates and magnitudes of the external charges */
+  /* attention - consider the extcharges to be in the nearest periodic image! */
+    //for (j=0; j<dftb->extcharges; j++)
+    //  dftb->extcharge[j] = mm->indexMM[j];
+    for (j=0; j<dftb->extcharges; j++) {
+      /* coordinates */
+      for (k=0; k<DIM; k++)
+        dftb->phase1.xe[j][k] = (double) NM_TO_BOHR * mm->xMM[j][k];
+      dftb->phase1.ze[j] = mm->MMcharges[j];
+      //printf("extch %5d: atom %5d, %7.3f %7.3f %7.3f q=%6.3f\n", j, mm->indexMM[j], dftb->phase1.xe[j][0], dftb->phase1.xe[j][1], dftb->phase1.xe[j][2], dftb->phase1.ze[j]);
+    }
+
+  /* test - write out the extcharges * /
+  printf("%d extcharges\n", dftb->extcharges);
+  for (j=0; j<dftb->extcharges; j++)
+    printf("%12.7f%12.7f%12.7f%12.7f\n", dftb->phase1.xe[j][XX] / NM_TO_BOHR * 10, dftb->phase1.xe[j][YY]  / NM_TO_BOHR * 10, dftb->phase1.xe[j][ZZ] / NM_TO_BOHR * 10, dftb->phase1.ze[j]);
+  printf("end extcharges\n");
+  / * end test */
+
+  /* begin debug - check of sum of extcharges
+  printf("Sum of extcharges:");
+  sum = 0.0;
+  for (j=0; j<ct->extcharges[i]; j++)
+    sum += dftb->phase1[i].ze[j];
+  printf("%12.7f\n", sum);
+  end debug */
+
+  return;
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_cdko.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_cdko.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_cdko.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_cdko.c	2014-01-09 13:32:57.000000000 +0100
@@ -0,0 +1,271 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include"qm_dftb.h"
+
+// missing in include/vec.h somehow...
+static gmx_inline void dvec_dec(dvec a,const dvec b)
+{
+  double x,y,z;
+
+  x=a[XX]-b[XX];
+  y=a[YY]-b[YY];
+  z=a[ZZ]-b[ZZ];
+
+  a[XX]=x;
+  a[YY]=y;
+  a[ZZ]=z;
+}
+
+// adopted from src/gmxlib/pbc.c
+static inline void pbc_dx_dftb(matrix box, const dvec x1, const dvec x2, dvec dx)
+{
+    int i;
+    double length;
+
+    for(i=0; i<DIM; i++) {
+        dx[i] = x1[i] - x2[i];
+        length = (double) box[i][i] * NM_TO_BOHR;
+        while (dx[i] > length / 2.) {
+            dx[i] -= length;
+        }
+        while (dx[i] < - length / 2.) {
+            dx[i] += length;
+        }
+    }
+
+    return;
+}
+
+/* OLD VERSION WITH QM/MM NEIGHBORLIST, UNFINISHED!!!
+ * DOUBLE-CHECK BEFORE USING!!!
+ *
+void cdkopotential(dftb_t *dftb, t_nblist *QMMMlist, int *indexMM, dvec *x, dvec *xe)
+{
+  dftb_phase1_t dftb1 = dftb->phase1;
+  int i, j, k, l, izpj, izpk;
+  dvec bond;
+  double alpha, beta, exptmp, gamma, uhder, uhub, uhubre, uhubre2, mmuhubre;
+
+  // do it for every QM atom
+  for (j=0; j<nn; j++) { 
+    dftb1.shiftE2[j] = 0.;
+    // some numbers only specific to the QM atom
+    alpha    = dftb->alpha1[dftb1.izp[j]];
+    beta     = dftb->beta1[dftb1.izp[j]];
+    uhder    = dftb->uhder1[dftb1.izp[j]];
+    uhub     = dftb->uhubb1[dftb1.izp[j]] - uhder * QM_CHARGE(j);
+    uhubre   = 1. / uhub;
+    uhubre2  = SQR(uhubre);
+    // add the correction only for MM atoms in the neighbor list!
+    for (k=QMMMlist->jindex[j]; k<QMMMlist->jindex[j+1]; k++) {
+      // obtain the index l of the MM atom
+      for (l=0; ; l++)
+        if (indexMM[l] == QMMMlist->jjnr[k]) {
+          break;
+        }
+      dvec_sub(dftb1.x[j], dftb1.xe[l], bond);
+      if (dnorm(bond) < 0.001) {
+        // this may occur on the first step of simulation for link atom(s)
+        // just skip this
+      } else {
+        // delete the 1/R contribution ...
+        dftb1.pot5[j] -= dftb1.ze[l] / dnorm(bond);
+        // ... and calculate the KO contribution here
+        exptmp  = exp(-beta * dnorm(bond));
+        // Guanhua: add MM contribution
+        if (! dftb->mmhub_inf) {
+          mmuhubre = 1. / dftb->uhubb1[dftb->mm_element[l]];
+        } else {
+          mmuhubre = 0.;
+        }
+        gamma = 1. / sqrt(dnorm2(bond) + alpha * SQR(uhubre+mmuhubre) * exptmp);
+        dftb1.pot5[j] += gamma * dftb1.ze[l];
+        dftb1.shiftE2[j] += dftb1.ze[l] * CUB(gamma) * (-QM_CHARGE(j)) * uhubre2 * uhder * (uhubre+mmuhubre) * alpha * exptmp;
+        //printf("Ewald-SR for QM=%d, MM=%d: contrib = %f\n", j+1, l+1, dftb1.ze[l] / dnorm(bond) * gmx_erfc(fr->ewaldcoeff * dnorm(bond) / NM_TO_BOHR));
+      }
+    }
+  }
+}
+*/
+
+void cdkopotential(dftb_t *dftb, matrix box)
+{
+  dftb_phase1_t dftb1 = dftb->phase1;
+  int j, k, l, nn;
+  dvec bond;
+  double alpha, beta, exptmp, gamma, uhder, uhub, uhubre, uhubre2, mmuhubre, dbondnorm;
+
+  nn = dftb1.nn;
+
+  // do it for every QM atom
+  for (j=0; j<nn; j++) { 
+    dftb1.shiftE2[j] = 0.;
+    // some numbers only specific to the QM atom
+    alpha    = dftb->alpha1[dftb1.izp[j]];
+    beta     = dftb->beta1[dftb1.izp[j]];
+    uhder    = dftb->uhder1[dftb1.izp[j]];
+    uhub     = dftb->uhubb1[dftb1.izp[j]] - uhder * QM_CHARGE(j);
+    uhubre   = 1. / uhub;
+    uhubre2  = SQR(uhubre);
+    // add the correction only for MM atoms in the neighbor list!
+    for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+      l = dftb1.neighbor_pme[j][k];
+      pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+      dbondnorm = dnorm(bond);
+      if (dbondnorm < 0.001) {
+        // this may occur on the first step of simulation for link atom(s)
+        // just skip this
+      } else {
+        // delete the 1/R contribution ...
+        dftb1.pot5[j] -= dftb1.ze[l] / dbondnorm;
+        // ... and calculate the KO contribution here
+        exptmp = exp(-beta * dbondnorm);
+        // Guanhua: add MM contribution
+        if (! dftb->mmhub_inf) {
+          mmuhubre = 1. / dftb->uhubb1[dftb->mm_element[l]];
+        } else {
+          mmuhubre = 0.;
+        }
+        gamma = 1. / sqrt(SQR(dbondnorm) + alpha * SQR(uhubre+mmuhubre) * exptmp);
+        dftb1.pot5[j] += gamma * dftb1.ze[l];
+        dftb1.shiftE2[j] += dftb1.ze[l] * CUB(gamma) * (-QM_CHARGE(j)) * uhubre2 * uhder * (uhubre+mmuhubre) * alpha * exptmp;
+        //printf("Ewald-SR for QM=%d, MM=%d: contrib = %f\n", j+1, l+1, dftb1.ze[l] / dnorm(bond) * gmx_erfc(fr->ewaldcoeff * dnorm(bond) / NM_TO_BOHR));
+      }
+    }
+  }
+
+  return;
+}
+
+/* OLD VERSION WITH QM/MM NEIGHBORLIST, UNFINISHED!!!
+ * DOUBLE-CHECK BEFORE USING!!!
+ *
+void cdkograd(dftb_t *dftb, t_nblist *QMMMlist, int *indexMM, dvec *x, dvec *xe, dvec *grad, dvec *mmgrad)
+{
+  int i, j, k, izpj, izpk;
+  double r, erep, grdr, xh;
+  dvec bond, tmpgrad;
+  double alpha, beta, gamma, uhder, uhub, uhubre, uhubre2, mmuhubre, factor;
+
+  // do it for every QM atom
+  for (j=0; j<nn; j++) {
+
+    // some numbers only specific to the QM atom
+    alpha    = dftb->alpha1[dftb1.izp[j]];
+    beta     = dftb->beta1[dftb1.izp[j]];
+    uhder    = dftb->uhder1[dftb1.izp[j]];
+    uhub     = dftb->uhubb1[dftb1.izp[j]] - uhder * QM_CHARGE(j);
+    uhubre   = 1. / uhub;
+    uhubre2  = SQR(uhubre);
+
+    // add the correction only for MM atoms in the neighbor list!
+    for (k=QMMMlist->jindex[j]; k<QMMMlist->jindex[j+1]; k++) {
+      // obtain the index l of the MM atom
+      for (l=0; ; l++)
+        if (indexMM[l] == QMMMlist->jjnr[k]) {
+          break;
+        }
+      dvec_sub(dftb1.x[j], dftb1.xe[l], bond);
+      if (dnorm(bond) < 0.001) {
+        // this may occur on the first step of simulation for link atom(s)
+        // just skip this
+      } else {
+        if (! dftb->mmhub_inf) {
+          mmuhubre = 1. / dftb->uhubb1[dftb->mm_element[l]];
+        } else {
+          mmuhubre = 0.;
+        }
+        // exptmp  = exp(-beta * dnorm(bond));
+        // factor =  -QM_CHARGE(j) * ZE(k) * (1. - alpha * beta / 2. / dnorm(bond) * SQR(uhubre+mmuhubre) * exptmp - 1.) / CUB(dnorm(bond))
+        // SIMPLIFIED TO:
+        factor =  QM_CHARGE(j) * dftb1.ze[l] * alpha * beta / 2. * SQR(uhubre+mmuhubre) * exp(-beta * dnorm(bond)) / SQR(dnorm2(bond));
+        // CHARMM original slightly edited...
+//        dgr = bond(1) / CUB(dnorm(bond)) *qcharge*ZE(k)*(1.0d0-kaltmp*kbetmp/2.0d0/dnorm(bond)*(uhubre+mmuhubre)**2 *exptmp)
+//            - bond(1) / CUB(dnorm(bond)) *      (qmat(j)-qzeroscc(izpj))*ZE(k)
+//        gr(1,j) = gr(1,j) + dgr
+//
+//        dgr = bond(2) / CUB(dnorm(bond)) *qcharge*ZE(k)*(1.0d0-kaltmp*kbetmp/2.0d0/dnorm(bond)*(uhubre+mmuhubre)**2 *exptmp)
+//            - bond(2) / CUB(dnorm(bond)) *      (qmat(j)-qzeroscc(izpj))*ZE(k)
+//        gr(2,j) = gr(2,j) + dgr
+//
+//        dgr = bond(3) / CUB(dnorm(bond)) *qcharge*ZE(k)*(1.0d0-kaltmp*kbetmp/2.0d0/dnorm(bond)*(uhubre+mmuhubre)**2 *exptmp)
+//            - bond(3) / CUB(dnorm(bond)) *      (qmat(j)-qzeroscc(izpj))*ZE(k)
+//        gr(3,j) = gr(3,j) + dgr
+        // so, calculate just vec(dgr) = vec(bond) * factor
+        dsvmul(factor, bond, dgr);
+        // add the contribution to QM gradient
+        dvec_inc(dftb1.grad[j], dgr); // is the sign correct?
+        // contribution to MM gradient
+        dvec_dec(dftb1.mmgrad[l], dgr); // is the sign correct?
+      }
+    } // k
+
+}
+*/
+
+void cdkograd(dftb_t *dftb, matrix box, dvec *grad, dvec *mmgrad)
+{
+  dftb_phase1_t dftb1 = dftb->phase1;
+  int j, k, l, nn;
+  double r, erep, grdr, xh;
+  dvec bond, tmpgrad, dgr;
+  double alpha, beta, gamma, uhder, uhub, uhubre, uhubre2, mmuhubre, factor, dbondnorm;
+
+  nn = dftb1.nn;
+
+  // do it for every QM atom
+  for (j=0; j<nn; j++) {
+
+    // some numbers only specific to the QM atom
+    alpha    = dftb->alpha1[dftb1.izp[j]];
+    beta     = dftb->beta1[dftb1.izp[j]];
+    uhder    = dftb->uhder1[dftb1.izp[j]];
+    uhub     = dftb->uhubb1[dftb1.izp[j]] - uhder * QM_CHARGE(j);
+    uhubre   = 1. / uhub;
+    uhubre2  = SQR(uhubre);
+
+    // add the correction only for MM atoms in the neighbor list!
+    for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+      l = dftb1.neighbor_pme[j][k];
+      pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+      dbondnorm = dnorm(bond);
+      if (dbondnorm < 0.001) {
+        // this may occur on the first step of simulation for link atom(s)
+        // just skip this
+      } else {
+        if (! dftb->mmhub_inf) {
+          mmuhubre = 1. / dftb->uhubb1[dftb->mm_element[l]];
+        } else {
+          mmuhubre = 0.;
+        }
+        // exptmp  = exp(-beta * dnorm(bond));
+        // factor =  -QM_CHARGE(j) * ZE(k) * (1. - alpha * beta / 2. / dnorm(bond) * SQR(uhubre+mmuhubre) * exptmp - 1.) / CUB(dnorm(bond))
+        // SIMPLIFIED TO:
+        factor =  QM_CHARGE(j) * dftb1.ze[l] * alpha * beta / 2. * SQR(uhubre+mmuhubre) * exp(-beta * dbondnorm) / QRT(dbondnorm);
+        /* CHARMM original slightly edited...
+        dgr = bond(1) / CUB(dnorm(bond)) *qcharge*ZE(k)*(1.0d0-kaltmp*kbetmp/2.0d0/dnorm(bond)*(uhubre+mmuhubre)**2 *exptmp)
+            - bond(1) / CUB(dnorm(bond)) *      (qmat(j)-qzeroscc(izpj))*ZE(k)
+        gr(1,j) = gr(1,j) + dgr
+
+        dgr = bond(2) / CUB(dnorm(bond)) *qcharge*ZE(k)*(1.0d0-kaltmp*kbetmp/2.0d0/dnorm(bond)*(uhubre+mmuhubre)**2 *exptmp)
+            - bond(2) / CUB(dnorm(bond)) *      (qmat(j)-qzeroscc(izpj))*ZE(k)
+        gr(2,j) = gr(2,j) + dgr
+
+        dgr = bond(3) / CUB(dnorm(bond)) *qcharge*ZE(k)*(1.0d0-kaltmp*kbetmp/2.0d0/dnorm(bond)*(uhubre+mmuhubre)**2 *exptmp)
+            - bond(3) / CUB(dnorm(bond)) *      (qmat(j)-qzeroscc(izpj))*ZE(k)
+        gr(3,j) = gr(3,j) + dgr
+        */
+        // so, calculate just vec(dgr) = vec(bond) * factor
+        dsvmul(factor, bond, dgr);
+        // add the contribution to QM gradient
+        dvec_inc(dftb1.grad[j], dgr); // is the sign correct?
+        // contribution to MM gradient
+        dvec_dec(dftb1.mmgrad[l], dgr); // is the sign correct?
+      }
+    } // k
+  } // j
+
+  return;
+}
+
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_declarations.h gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_declarations.h
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_declarations.h	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_declarations.h	2015-02-10 12:23:50.165211094 +0100
@@ -0,0 +1,112 @@
+
+void init_dftb(t_QMrec *qm, t_MMrec *mm, t_inputrec *ir);
+
+void prepare_dftb(t_QMrec *qm, t_MMrec *mm);
+
+double run_dftb1(dftb_t *dftb, rvec f[], rvec fshift[], t_commrec *cr, t_forcerec *fr, matrix box);
+
+// broyden.c
+  void broyden(int niter, double alpha, int jtop, double *vecin, double *vecout, dftb_broyden_t *arrays);
+// cdko.c
+  void cdkopotential(dftb_t *dftb, matrix box);
+  void cdkograd(dftb_t *dftb, matrix box, dvec *grad, dvec *mmgrad);
+// dispersive*.c
+  int dispersion_dftd3_init(dftb_t *dftb, char *slko_path);
+  double dispersion_dftd3(dftb_t *dftb, dvec *g);
+// fermi.c
+  void fermi(int ndim, double *ev, double *occ, double *efermi, int nelectrons, double telec);
+// gammamat.c
+  void gammamatrix(int nat, dvec *rat, double **gammamat, double uhubb[DFTB_MAXTYPES], int *izp);
+  void gammamatrix1(int nat, dvec *rat, int *izp, double uhubb[DFTB_MAXTYPES], dvec **gamma_deriv);
+  double gam12(double r, double uhub1, double uhub2);
+  double gam121(double r, double uhub1, double uhub2);
+  double gamsub(double a, double b, double r, double rrc);
+  double gamsubder(double a, double b, double r, double rrc);
+// gradient.c
+  void usual_gradient(dftb_t *dftb, dvec *x, dvec *grad);
+  void gamma_gradient(dftb_t *dftb, dvec *x, dvec *grad);
+// mulliken.c
+  void mulliken(int nn, double *qmat, double *qmulli, double *qtot, int ndim,
+                double *occ, double **a, double **overl, int *ind,
+                int lmax[DFTB_MAXTYPES], int *izp);
+// neighborlist.c
+  void do_neighborlist_for_dftb(dftb_t *dftb, matrix box);
+// output.c
+  void outeigenvectors(double **a, double *ev, int *ind, int nn, dftb_phase1_t dftb1);
+  void outspec(int nn, int ndim, int *ind, double *ev, double *occ,
+               double efermi, double *qmat, double *qmulli, dftb_t *dftb, dftb_phase1_t dftb1);
+// repulsive.c
+  double repulsive(dftb_t *dftb, dvec *x, dvec *grad);
+// skpar.c
+  int skspar(int i, int j, double r2, double dd[13],
+             int lmax[DFTB_MAXTYPES], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3],
+             int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES]);
+  int skhpar(int i, int j, double r2, double dd[13],
+             int lmax[DFTB_MAXTYPES], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3],
+             int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES]);
+  double cubicspline(double f0, double f1, double f2, double x0, double x1,
+       double xh, double hl, double dr);
+  double spline5th(double f0, double f1, double f2, double x0, double x1, double x2,
+       double xh, double dr, int mxind);
+// slkode.c
+  void slkmatrices(int i, int j, double (*xat)[3], double ham[LDIM][LDIM], double over[LDIM][LDIM],
+                 int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+                 int *izp, tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3]);
+  void slkode(double dum[3], int i, int j, double em[LDIM][LDIM], int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+               int (*iovpar)(int, int, double, double [13], int [DFTB_MAXTYPES],
+                 tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+                 int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+               tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3]);
+// slktrafo.c
+  void skss(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double[13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+                 int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3]);
+  void sksp(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double[13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+                 int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], double emt[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3]);
+  void sksd(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double[13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+                 int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], double emt[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3]);
+  void skpp(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double[13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+                 int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3]);
+  void skpd(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double[13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+                 int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], double emt[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3]);
+  void skdd(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double[13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+                 int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3]);
+  void selfs(int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double[13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+                 int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3]);
+  void selfp(int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double[13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+                 int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3]);
+  void selfd(int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double[13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+                 int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3]);
+
+/* new functions with DFTB3 */
+// qm_dftb_gamma.c
+void gammaall(double r, double ui, double uj, double udi, int xhgammahlp, double zeta, double *gval, double *gder);
+void gammaall1(double r, double ui, double uj, double udi, int xhgammahlp, double zeta, double *dcdr, double *dcdr3);
+void gammagrad(int nn, dvec *x, int *izp, double *uhubb, double *uhder, int *izpxh, double zeta, double *qdiff, int sccmode, double **hgrad);
+void get_gammamat(int nn, dvec *x, int *izp, double *uhubb, double *uhder, double zeta, int *izpxh, double **gammamat, double **gammader);
+
+/* Levenberg--Marquardt optimization */
+// qm_dftb_levmar.c
+int dlevmar_der(
+      void (*func)(double *p, double *hx, int m, int n, void *adata),
+      void (*jacf)(double *p, double *j, int m, int n, void *adata),
+      double *p, double *x, int m, int n, int itmax, double *opts,
+      double *info, double *work, double *covar, void *adata);
+
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_dispersive.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_dispersive.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_dispersive.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_dispersive.c	2013-11-15 18:48:58.000000000 +0100
@@ -0,0 +1,65 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include"qm_dftb.h"
+
+double dispersive(dftb_t *dftb, dvec *x, dvec *grad)
+{
+  int i, j, k, izpj, izpk;
+  double r, erep, grdr, xh;
+  dvec bond, tmpgrad;
+
+  erep = 0.e0;
+
+  for (j=0; j<dftb->atoms; j++) {
+    izpj = dftb->phase1.izp[j];
+    //printf("repulsive j = %d\n", j);
+    for (k=j+1; k<dftb->atoms; k++) {
+      izpk = dftb->phase1.izp[k];
+      dvec_sub(x[k], x[j], bond);
+      r = dnorm(bond);
+      /* calculate the contribution of the pair j, k */
+      if (r < 1.e-2 || r > dftb->cutoff[izpj][izpk]) {
+        /* overlap of atoms OR behind cutoff - no contribution */
+        continue;
+      }
+      if (r < dftb->xr[izpj][izpk][0][0]) {
+        erep += exp(-dftb->efkt[izpj][izpk][0]*r + dftb->efkt[izpj][izpk][1]) + dftb->efkt[izpj][izpk][2];
+        grdr = -dftb->efkt[izpj][izpk][0] * exp(-dftb->efkt[izpj][izpk][0]*r + dftb->efkt[izpj][izpk][1]);
+      } else {
+        /* otherwise - cubic spline */
+        for (i=0; i<dftb->numint[izpj][izpk]; i++)
+          if (r >= dftb->xr[izpj][izpk][i][0] && r < dftb->xr[izpj][izpk][i][1])
+            break;
+        xh = r - dftb->xr[izpj][izpk][i][0];
+        if (i < dftb->numint[izpj][izpk] - 1) {
+          erep += dftb->coeff[izpj][izpk][i][0] +
+                  dftb->coeff[izpj][izpk][i][1] * xh +
+                  dftb->coeff[izpj][izpk][i][2] * xh * xh +
+                  dftb->coeff[izpj][izpk][i][3] * xh * xh * xh;
+          grdr = dftb->coeff[izpj][izpk][i][1] +
+                 2 * dftb->coeff[izpj][izpk][i][2] * xh +
+                 3 * dftb->coeff[izpj][izpk][i][3] * xh * xh;
+        } else { /* 5th order spline is the last */
+          erep += dftb->coeff[izpj][izpk][i][0] +
+                  dftb->coeff[izpj][izpk][i][1] * xh +
+                  dftb->coeff[izpj][izpk][i][2] * xh * xh +
+                  dftb->coeff[izpj][izpk][i][3] * xh * xh * xh +
+                  dftb->coeff[izpj][izpk][i][4] * xh * xh * xh * xh +
+                  dftb->coeff[izpj][izpk][i][5] * xh * xh * xh * xh * xh;
+          grdr = dftb->coeff[izpj][izpk][i][1] +
+                 2 * dftb->coeff[izpj][izpk][i][2] * xh +
+                 3 * dftb->coeff[izpj][izpk][i][3] * xh * xh +
+                 4 * dftb->coeff[izpj][izpk][i][4] * xh * xh * xh +
+                 5 * dftb->coeff[izpj][izpk][i][5] * xh * xh * xh * xh;
+        }
+      }
+      dsvmul(grdr / r, bond, tmpgrad);
+      dvec_inc(grad[k], tmpgrad);
+      /* the other atom - j */
+      dsvmul(-grdr / r, bond, tmpgrad);
+      dvec_inc(grad[j], tmpgrad);
+    }
+  }
+  return erep;
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_dispersive_dftd3.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_dispersive_dftd3.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_dispersive_dftd3.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_dispersive_dftd3.c	2014-09-08 01:46:28.000000000 +0200
@@ -0,0 +1,565 @@
+//#include<stdio.h>
+//#include<stdlib.h>
+//#include<math.h>
+//#include<string.h>
+#include "qm_dftb.h"
+
+/* some 2D arrays are linearized, and they use this `*/
+#define lin(i1,i2) (     \
+  (i1 < i2)              \
+  ?                      \
+  (i1 + i2 * (i2+1) / 2) \
+  :                      \
+  (i2 + i1 * (i1+1) / 2) \
+)
+
+/* missing in vec.h somehow */
+static gmx_inline void dvec_dec(dvec a, const dvec b)
+{
+    double x, y, z;
+
+    x = a[XX]-b[XX];
+    y = a[YY]-b[YY];
+    z = a[ZZ]-b[ZZ];
+
+    a[XX] = x;
+    a[YY] = y;
+    a[ZZ] = z;
+}
+
+/* global ad hoc parameters */
+const double k1=16., k2=4./3., k3=-4.;
+
+void get_element_data(double *r2r4, double *rcov, char *path);
+void copy_c6(double c6ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM][DFTB_D3_MAXC][DFTB_D3_MAXC][3],
+             int *mxc, char *path);
+void setr0ab(double autoang, double r0ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM], char *path);
+
+double getc6(double c6ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM][DFTB_D3_MAXC][DFTB_D3_MAXC][3],
+             int *mxc, int iat, int jat, double nci, double ncj);
+double get_dc6_dcnij(double c6ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM][DFTB_D3_MAXC][DFTB_D3_MAXC][3],
+                     int mxci, int mxcj, double cni, double cnj, int zi, int zj, double *dc6i, double *dc6j);
+
+int dispersion_dftd3_init(dftb_t *dftb, char *slko_path)
+{
+  int i, n;
+  dftb_dftd3_t *d3;
+
+  snew(dftb->dftd3, 1);
+  d3 = dftb->dftd3;
+
+  n = dftb->atoms;
+
+  /* load parameters from files */
+  copy_c6(d3->c6ab, d3->mxc, slko_path);
+  get_element_data(d3->r2r4, d3->rcov, slko_path);
+  /* set radii */
+  setr0ab(10. / NM_TO_BOHR, d3->r0ab, slko_path);
+
+  /* memory allocation, including auxiliary arrays */
+  snew(d3->cn,       n);
+  /* auxiliary arrays to be used in subroutines */
+  snew(d3->drij,     CHOOSE2(n));
+  snew(d3->dcn,      CHOOSE2(n));
+  snew(d3->dc6_rest, CHOOSE2(n));
+  snew(d3->dc6ij,    n);
+  snew(d3->dc6ij[0], SQR(n));
+  for (i=1; i<n; i++)
+    d3->dc6ij[i]    = d3->dc6ij[0] + i * n;
+  snew(d3->skip,     CHOOSE2(n));
+
+  /*
+  Cutoff r^2 thresholds for the gradient in bohr^2.
+  rthr influences N^2 part of the gradient.
+  rthr2 influences the N^3 part of the gradient. When using
+  dftd3 in combination with semi-empirical methods or FFs, and large
+  (>1000 atoms) systems, rthr2 is crucial for speed:
+  Recommended values are 20^2 to 25^2 bohr.
+  */
+  d3->rthr   = 9000.; /*  UR, SE */
+  d3->rthr2  = 1600.;
+
+  /* norm rthr2 to r0HH to achieve more reasonable cutoffs */
+  d3->rthr2 /= d3->r0ab[0][0];
+
+  /*
+   * set parameters for functionals
+   * DFT-D3 with Becke-Johnson finite-damping, variant 2 with their radii 
+   * SE: Alp is only used in 3-body calculations
+   */
+
+  d3->s6   = 1.;
+  /* TPSS would be like this:
+  d3->rs6  = 0.4535;
+  d3->s18  = 1.9435;
+  d3->rs18 = 4.4752;
+  */
+
+  /* DFTB3 (zeta=4.0) */
+  d3->rs6  = 0.7461;
+  d3->s18  = 3.209; 
+  d3->rs18 = 4.1906;
+
+  /* fixed or dependent parameters: */
+  d3->rs8  = d3->rs18;
+
+  return 0;
+}
+
+void copy_c6(double c6ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM][DFTB_D3_MAXC][DFTB_D3_MAXC][3], int *mxc, char *path)
+{
+  /* c6ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM][DFTB_D3_MAXC][DFTB_D3_MAXC][3]
+   * mxc[DFTB_D3_MAXELEM]
+   */
+  FILE *f;
+  char filename[144];
+  int nlines;
+  int ei, ej, ci, cj, u;
+  int iat, jat, iadr, jadr, n;
+  double pars1;
+  int    pars2;
+  int    pars3;
+  double pars4;
+  double pars5;
+
+  sprintf(filename, "%sdftd3-pars.txt", path);
+  f = fopen(filename, "r");
+  if (f == NULL) {
+    printf("cannot read file %s\n", filename);
+    exit(-1);
+  }
+    
+  fscanf(f, "%d\n", &nlines);
+  printf("file %s will be read, %d lines assumed\n", filename, nlines);
+
+  for (ei=0; ei<DFTB_D3_MAXELEM; ei++)
+    for (ej=0; ej<DFTB_D3_MAXELEM; ej++)
+      for (ci=0; ci<DFTB_D3_MAXC; ci++)
+        for (cj=0; cj<DFTB_D3_MAXC; cj++)
+          for (u=0; u<3; u++)
+            c6ab[ei][ej][ci][cj][u] = -1.;
+
+  for (ei=0; ei<DFTB_D3_MAXELEM; ei++)
+    mxc[ei] = 0;
+
+  for (n=0; n<nlines; n++) {
+    fscanf(f, "%lg %d %d %lg %lg\n", &pars1, &pars2, &pars3, &pars4, &pars5);
+
+    iat  = pars2 - 1;
+    jat  = pars3 - 1;
+   
+    iadr = iat / 100;
+    jadr = jat / 100;
+  
+    iat  = iat % 100;
+    jat  = jat % 100;
+
+    if (iadr >= mxc[iat]) mxc[iat] = iadr + 1;
+    if (jadr >= mxc[jat]) mxc[jat] = jadr + 1;
+   
+    c6ab[iat][jat][iadr][jadr][0] = pars1;  
+    c6ab[iat][jat][iadr][jadr][1] = pars4;  
+    c6ab[iat][jat][iadr][jadr][2] = pars5;  
+   
+    c6ab[jat][iat][jadr][iadr][0] = pars1;  
+    c6ab[jat][iat][jadr][iadr][1] = pars5;  
+    c6ab[jat][iat][jadr][iadr][2] = pars4;  
+  }
+
+  printf("done with file %s\n", filename);
+  fclose(f);
+  return;
+}
+
+/*
+  scale r4/r2 values of the atoms by sqrt(Z) 
+  sqrt is also globally close to optimum
+  together with the factor 1/2 this yield reasonable
+  c8 for he, ne and ar. for larger Z, C8 becomes too large
+  which effectively mimics higher R^n terms neglected due
+  to stability reasons
+      
+  r2r4 =sqrt(0.5*r2r4(i)*dfloat(i)**0.5 ) with i=elementnumber
+  the large number of digits is just to keep the results consistent
+  with older versions. They should not imply any higher accuracy than
+  the old values
+*/
+
+void get_element_data(double *r2r4, double *rcov, char *path)
+{
+  FILE *f;
+  char filename[144];
+  int i, npar;
+
+  sprintf(filename, "%sdftd3-elemdata.txt", path); 
+  f = fopen(filename, "r");
+  if (f == NULL) {
+    printf("cannot read file %s\n", filename);
+    exit(-1);
+  }
+  fscanf(f, "%d", &npar);
+  printf("file %s will be read, (twice) %d parameters present (%d expected)\n", filename, npar, DFTB_D3_MAXELEM);
+  if (npar != DFTB_D3_MAXELEM) {
+    printf("these numbers must be equal!\n   ...  failure\n\n");
+    exit(-1);
+  }
+
+  for (i=0; i<DFTB_D3_MAXELEM; i++)
+    fscanf(f, "%lf", r2r4 + i);
+  for (i=0; i<DFTB_D3_MAXELEM; i++)
+    fscanf(f, "%lf", rcov + i);
+
+  printf("done with file %s\n", filename);
+  fclose(f);
+  return;
+}
+
+void setr0ab(double autoang, double r[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM], char *path)
+{
+  FILE *f;
+  char filename[144];
+  /* r[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM] */
+  int i, j, npar;
+  double r0ab;
+
+  sprintf(filename, "%sdftd3-r0ab.txt", path);
+  f = fopen(filename, "r");
+  if (f == NULL) {
+    printf("cannot read file %s\n", filename);
+    exit(-1);
+  }
+  fscanf(f, "%d", &npar);
+  printf("file %s will be read, %d parameters present (%d expected)\n", filename, npar, CHOOSE2(DFTB_D3_MAXELEM));
+  if (npar != CHOOSE2(DFTB_D3_MAXELEM)) {
+    printf("these numbers must be equal!\n   ...  failure\n\n");
+    exit(-1);
+  }
+
+  for (i=0; i<DFTB_D3_MAXELEM; i++)
+     for (j=0; j<=i; j++) {
+        fscanf(f, "%lf", &r0ab);
+        r[i][j] = r0ab / autoang;
+        r[j][i] = r0ab / autoang;
+     }
+
+  printf("done with file %s\n", filename);
+  fclose(f);
+  return;
+}
+
+double dispersion_dftd3(dftb_t *dftb, dvec *g)
+{
+  dftb_dftd3_t *d3;
+
+  /* number of atoms */
+  int n;
+  int iat, jat, zi, zj, i, kat;
+  double r0, c6, r42;
+  double r2, r, r4, r6, r8, t6, t8;
+  double edisp, gnorm;
+  double s8, a1, a2;
+
+  dvec rij, delta_g;
+  double rcovij, dc6, expterm;
+  int  linij,linik,linjk;
+
+  n  = dftb->atoms;
+  d3 = dftb->dftd3;
+
+ /*
+  *CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
+  *C compute coordination numbers by adding an inverse damping function
+  *CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
+  */
+
+  for (i=0; i<n; i++) {
+    d3->cn[i] = 0.;
+    for (iat=0; iat<n; iat++)
+      if (iat != i) {
+        dvec_sub(dftb->phase1.x[iat], dftb->phase1.x[i], rij);
+        r2 = dnorm2(rij);
+        if (r2 > d3->rthr2) /* beyond cut-off */
+          continue;
+
+        r  = sqrt(r2);
+        /* covalent distance in Bohr */
+        zi = dftb->element[dftb->atomtype[i]] - 1;
+        zj = dftb->element[dftb->atomtype[iat]] - 1;
+        /* counting function exponential has a better long-range behavior than MHGs inverse damping */
+        d3->cn[i] += 1. / (1. + exp(-k1 * ((d3->rcov[zi] + d3->rcov[zj]) / r - 1.)));
+      }
+  }
+
+  /* check if all parameters have been loaded and are resonable */
+  for (iat=0; iat<n-1; iat++)
+    for (jat=iat+1; jat<n; jat++) {
+      zi  = dftb->element[dftb->atomtype[iat]] - 1;
+      zj  = dftb->element[dftb->atomtype[jat]] - 1;
+      if (d3->r0ab[zj][zi] < .1) {
+         printf("\nDFTB -- dispersion D3 problem: \n   %d %d %d %d\n   RADIUS MISSING\n\n", iat, jat, zi, zj);
+         exit(-1);
+      }
+      c6 = getc6(d3->c6ab, d3->mxc, zi, zj, d3->cn[iat], d3->cn[jat]);
+      if (c6 < 1.e-6) {
+         printf("\nDFTB -- dispersion D3 problem: \n   %d %d %d %d %f %f\n   C6 MISSING\n\n", iat, jat, zi, zj, d3->cn[iat], d3->cn[jat]);
+         exit(-1);
+      }
+    }
+
+  /*
+  CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
+  C compute energy and gradient
+  CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
+  */
+
+
+  /* these have been allocated in the main routine:
+   * double drij[n*(n+1)/2];     d(E)/d(r_ij) derivative wrt. dist. iat-jat
+   * double dcn[n*(n+1)/2];      dCN(iat)/d(r_ij) is equal to dCN(jat)/d(r_ij)     
+   * double dc6_rest[n*(n+1)/2]; saves (1/r^6*f_dmp + 3*r4r2/r^8*f_dmp) for kat loop
+   * double dc6ij[n][n];         dC6(iat,jat)/dCN(iat) in dc6ij(i,j)
+   *                             dC6(iat,jat)/cCN(jat) in dc6ij(j,i)
+   * int skip[n*(n+1)/2];        (bool)                
+   */
+
+  for (iat=0; iat<n; iat++)
+    for (jat=0; jat<n; jat++)
+      d3->dc6ij[iat][jat] = 0.;
+
+  /* precompute for analytical part */
+  /* Becke-Johnson finite damping */
+
+  a1 = d3->rs6;
+  a2 = d3->rs8;
+  s8 = d3->s18;
+
+  edisp = 0.;
+
+  for (i=0; i<CHOOSE2(n); i++) {
+    d3->drij[i]     = 0.;
+    d3->dc6_rest[i] = 0.;
+    d3->dcn[i]      = 0.;
+    d3->skip[i]     = 0;
+  }
+
+  for (iat=0; iat<n; iat++)
+    for (jat=0; jat<iat; jat++) {
+      dvec_sub(dftb->phase1.x[jat], dftb->phase1.x[iat], rij);
+      r2 = dnorm2(rij);
+      if (r2 > d3->rthr)
+        continue;
+
+      zi              = dftb->element[dftb->atomtype[iat]] - 1;
+      zj              = dftb->element[dftb->atomtype[jat]] - 1;
+      linij           = lin(iat, jat);
+      d3->skip[linij] = 0;
+      r0              = d3->r0ab[zj][zi];
+      r42             = d3->r2r4[zi] * d3->r2r4[zj];
+      rcovij          = d3->rcov[zi] + d3->rcov[zj];
+
+      /* get_dC6_dCNij calculates the derivative dC6(iat,jat)/dCN(iat) and dC6(iat,jat)/dCN(jat).
+       * these are saved in dC6ij for the kat loop
+       */
+
+      c6 = get_dc6_dcnij(d3->c6ab, d3->mxc[zi], d3->mxc[zj],
+                    d3->cn[iat], d3->cn[jat], zi, zj,
+                    &(d3->dc6ij[iat][jat]), &(d3->dc6ij[jat][iat]));
+
+      r  = sqrt(r2);
+      r4 = SQR(r2);
+      r6 = r4*r2;
+      r8 = r6*r2;
+
+      /* use BJ radius */
+      r0 = a1 * sqrt(3. * r42) + a2;
+ 
+      t6 = r6 + HEX(r0);
+      t8 = r8 + OCT(r0);
+
+      d3->drij[linij] -= d3->s6 * c6 *  6. *       r4 * r / SQR(t6)
+                       +     s8 * c6 * 24. * r42 * r6 * r / SQR(t8);
+
+      d3->dc6_rest[linij] = d3->s6 / t6 + 3. * s8 * r42 / t8;
+
+      /* calculate E_disp */
+      edisp -= d3->dc6_rest[linij] * c6;
+
+      /* Calculate dCN(iat)/dr_ij which is identical to dCN(jat)/dr_ij
+       * this is needed for dC6/dr_ij          
+       * saved in dcn for the kat-loop
+       */
+
+      if (r2 < d3->rthr2) {
+        expterm        = exp(-k1 * (rcovij/r - 1.));
+        d3->dcn[linij] = -k1 * rcovij * expterm / (SQR(r) * SQR(expterm+1.));
+
+        /* Combine dC6/dCN * dCN/dr_ij to get dC6/dr_ij */
+        dc6 = (d3->dc6ij[iat][jat] + d3->dc6ij[jat][iat]) * d3->dcn[linij];
+
+        /* in dC6_rest all terms BUT C6-term is saved for the kat-loop */
+        d3->drij[linij] += d3->dc6_rest[linij]*dc6;
+        /* d(C6(ij))/d(r_ij) */
+
+      } else {
+        dc6             = 0.;
+        d3->dcn[linij]  = 0.;
+        d3->skip[linij] = 1;
+      }
+
+      /*
+       * The kat loop calculates the contributions of dC6(i,k)/dr_ij,
+       * dC6(j,k)/dr_ij, dC6(ij)/dr_ik, dC6(ij)/dr_jk
+       * Basically all term that depend on the coordinates of 3 atoms
+       * This is the reason, why the gradient scales N^3
+       */
+      for (kat=0; kat<jat; kat++) {
+
+        linik = lin(iat,kat);
+        linjk = lin(jat,kat);
+
+        d3->drij[linij] += d3->dc6_rest[linik] * d3->dc6ij[iat][kat] * d3->dcn[linij]
+                        +  d3->dc6_rest[linjk] * d3->dc6ij[jat][kat] * d3->dcn[linij];
+
+        if (!(d3->skip[linjk]))
+          d3->drij[linjk] += d3->dc6_rest[linik] * d3->dc6ij[kat][iat] * d3->dcn[linjk]
+                          +  d3->dc6_rest[linij] * d3->dc6ij[jat][iat] * d3->dcn[linjk];
+
+        if (!(d3->skip[linik]))
+          d3->drij[linik] += d3->dc6_rest[linjk] * d3->dc6ij[kat][jat] * d3->dcn[linik]
+                          +  d3->dc6_rest[linij] * d3->dc6ij[iat][jat] * d3->dcn[linik];
+      } /* kat */
+
+    } /* jat iat */
+
+  /* After calculating all derivatives dE/dr_ij w.r.t. distances,
+   * the grad w.r.t. the coordinates is calculated dE/dr_ij * dr_ij/dxyz_i       
+   */
+  for (iat=1; iat<n; iat++)
+    for (jat=0; jat<iat; jat++) {
+      dvec_sub(dftb->phase1.x[jat], dftb->phase1.x[iat], rij);
+      r = dnorm(rij);
+      dsvmul(d3->drij[lin(iat,jat)] / r, rij, delta_g);
+      dvec_inc(g[iat], delta_g);
+      dvec_dec(g[jat], delta_g);
+    }
+
+  gnorm = 0.;
+  for (i=0; i<3; i++)
+    for (jat=0; jat<n; jat++)
+      gnorm += fabs(g[jat][i]);
+  printf("\n|G| = %12.8f\n", gnorm);
+  printf("D3 EDISP = %f a.u. = %f kJ/mol\n", edisp, edisp * HARTREE_TO_KJMOL);
+
+  return edisp;
+}
+
+/*
+ *CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
+ *C      The   N E W   gradC6 routine    C
+ *CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
+ */
+
+double get_dc6_dcnij(double c6ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM][DFTB_D3_MAXC][DFTB_D3_MAXC][3], int mxci, int mxcj,
+                     double cni, double cnj, int izi, int izj, double *dc6i, double *dc6j)
+{
+  /* c6ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM][DFTB_D3_MAXC][DFTB_D3_MAXC][3] */
+
+  int a,b;
+  double  numer,    denom,
+         dnumer_i, ddenom_i,
+         dnumer_j, ddenom_j,
+         expterm, cn_refi, cn_refj, c6ref, r, c6mem, r_save;
+
+  c6mem  = -1.e99;
+  r_save = 9999.;
+  numer  = 0.;
+  denom  = 0.;
+
+  dnumer_i = 0.;
+  ddenom_i = 0.;
+  dnumer_j = 0.;
+  ddenom_j = 0.;
+
+  for (a=0; a<mxci; a++)
+    for (b=0; b<mxcj; b++) {
+      c6ref = c6ab[izi][izj][a][b][0];
+      if (c6ref > 0.) {
+        cn_refi = c6ab[izi][izj][a][b][1];
+        cn_refj = c6ab[izi][izj][a][b][2];
+
+        r= SQR(cn_refi-cni) + SQR(cn_refj-cnj);
+        if (r < r_save) {
+           r_save = r;
+           c6mem  = c6ref;
+        }
+
+        expterm   = exp(k3*r);
+        numer    += c6ref * expterm;
+        denom    +=         expterm;
+        dnumer_i += c6ref * expterm * 2. * k3 * (cni-cn_refi);
+        ddenom_i +=         expterm * 2. * k3 * (cni-cn_refi);
+        dnumer_j += c6ref * expterm * 2. * k3 * (cnj-cn_refj);
+        ddenom_j +=         expterm * 2. * k3 * (cnj-cn_refj);
+      } /* if */
+    } /* for b, for a */
+
+  if (denom > 1.e-99) {
+    *dc6i = ((dnumer_i*denom) - (ddenom_i*numer)) / SQR(denom);
+    *dc6j = ((dnumer_j*denom) - (ddenom_j*numer)) / SQR(denom);
+    return numer / denom;
+  } else {
+    *dc6i = 0.;
+    *dc6j = 0.;
+    return c6mem;
+  }
+} /* subroutine get_dC6_dCNij */
+
+/*
+ *CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
+ *C interpolate c6  
+ *CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
+ */
+
+double getc6(double c6ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM][DFTB_D3_MAXC][DFTB_D3_MAXC][3],
+             int *mxc, int iat, int jat, double nci, double ncj)
+{
+  /* c6ab[DFTB_D3_MAXELEM][DFTB_D3_MAXELEM][DFTB_D3_MAXC][DFTB_D3_MAXC][3]
+     mxc[DFTB_D3_MAXELEM]
+   */
+  int i,j;
+  double c6, c6mem, cn1, cn2, r, rsum, csum, exptmp, r_save;
+
+  /* the exponential is sensitive to numerics
+   * when nci or ncj is much larger than cn1/cn2
+   */
+
+  c6mem  = -1.e+99;
+  rsum   = 0.0;
+  csum   = 0.0;
+  c6     = 0.0;
+  r_save = 1000.;
+
+  for (i=0; i<mxc[iat]; i++)
+    for (j=0; j<mxc[jat]; j++) {
+      c6 = c6ab[iat][jat][i][j][0];
+      if (c6 > 0.) {
+        cn1  = c6ab[iat][jat][i][j][1];
+        cn2  = c6ab[iat][jat][i][j][2];
+        /* distance */
+        r    = SQR(cn1-nci) + SQR(cn2-ncj);
+        if (r < r_save) {
+           r_save = r;
+           c6mem  = c6;
+        }
+        exptmp = exp(k3*r);
+        rsum  += exptmp;     
+        csum  += exptmp * c6;
+      }
+    }
+
+  if (rsum > 1.e-99)
+     return csum / rsum;
+  else
+     return c6mem;
+}
+
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_eglcao.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_eglcao.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_eglcao.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_eglcao.c	2015-02-20 16:54:51.985335321 +0100
@@ -0,0 +1,1275 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include<time.h>
+//#include"charge_transfer.h"
+#include"qm_dftb.h"
+
+/*      PROGRAM DYLCAO */
+/*     ================ */
+
+/*     Copyright 1991 by Peter Blaudeck, Dirk Porezag */
+
+/* ********************************************************************* */
+
+/*     PROGRAM CHARACTERISTICS */
+/*     ----------------------- */
+
+/* DYLCAO calculates the dynamics of various systems */
+/* within a two-centre SETB formalism */
+
+/* ********************************************************************* */
+/*
+
+     PHASE 1 FOR CHARGE TRANSFER -- CALC OF FRAGMENTS
+
+*/
+/* ********************************************************************* */
+
+
+/*     SUBROUTINE EGLCAO */
+/*     ================= */
+
+/*     Copyright 1997 by Peter Blaudeck, Dirk Porezag, Michael Haugk, */
+/*                       Joachim Elsner */
+
+/* ********************************************************************* */
+
+/*     PROGRAM CHARACTERISTICS */
+/*     ----------------------- */
+
+/* eglcao calculates energy and gradient for dylcao as shown by Seifert. */
+/* The determination of the occupation numbers has been changed to be */
+/* also valid for metallic systems. */
+
+/* PARAMETERS: */
+/* nn      i  number of atoms */
+/* x       r  coordinates (n3) */
+/* eel     r  electronic energy */
+/* miter   i  number of scf-iterations performed */
+/* qmat    r */
+
+/* ********************************************************************* */
+
+// missing in include/vec.h somehow...
+static gmx_inline void dvec_dec(dvec a,const dvec b)
+{
+  double x,y,z;
+
+  x=a[XX]-b[XX];
+  y=a[YY]-b[YY];
+  z=a[ZZ]-b[ZZ];
+
+  a[XX]=x;
+  a[YY]=y;
+  a[ZZ]=z;
+}
+
+// adopted from src/gmxlib/pbc.c
+static inline void pbc_dx_dftb(matrix box, const dvec x1, const dvec x2, dvec dx)
+{
+    int i;
+    double length;
+
+    for(i=0; i<DIM; i++) {
+        dx[i] = x1[i] - x2[i];
+        length = (double) box[i][i] * NM_TO_BOHR;
+        while (dx[i] > length / 2.) {
+            dx[i] -= length;
+        }
+        while (dx[i] < - length / 2.) {
+            dx[i] += length;
+        }
+    }
+
+    return;
+}
+
+// lapack routine(s)
+static long dsygv(long itype, char jobz, char uplo, long n, double *a, long lda,
+             double *b, long ldb, double *w, double *work, long lwork)
+{
+  extern void dsygv_(long *, char *, char *, long *, double *, long *, double *,
+                long *, double *, double *, long *, long *);
+  long info;
+  dsygv_(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work, &lwork, &info);
+  return info;
+}
+
+static long dsygvd(long itype, char jobz, char uplo, long n, double *a, long lda,
+             double *b, long ldb, double *w, double *work, long lwork, long *iwork, long liwork)
+{
+  extern void dsygvd_(long *, char *, char *, long *, double *, long *, double *,
+                long *, double *, double *, long *, long *, long *, long *);
+  long info;
+  dsygvd_(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work, &lwork, iwork, &liwork, &info);
+  return info;
+}
+
+void print_time_difference(char *s, struct timespec start, struct timespec end)
+{
+  int sec, nsec;
+  long long value=0ll;
+
+  value = 1000000000ll * ((long long) end.tv_sec - (long long) start.tv_sec) + (long long) (end.tv_nsec - start.tv_nsec);
+  printf("%s %12lld\n", s, value);
+
+  return;
+}  
+
+/* NDIM = NORB !!! */
+
+double run_dftb1(dftb_t *dftb, rvec f[], rvec fshift[], t_commrec *cr, t_forcerec *fr, matrix box) // i - nucleobase to be calculated
+// int eglcao(int nn, double x[NNDIM][3], double *eel, int *miter, double qmat[NNDIM], int phase)
+{
+  const double scftol = 1.e-9;
+  const double almix = 0.2;
+  //const int maxiter = 70;
+  
+  int indj, indk, indj1, indk1;
+  double eel, ecoul, ecoul3, efermi, eelold, eext, erep, edisp;
+  //
+  int i, j, k, l, m, n, li, lj, niter, xx, yy, zz;
+  //int nmaofo, ii, jj, kk, ll, jfo, jao;
+  //double r2;
+  double qtot, r;
+  // lapack
+  long ier; // ndim;
+  char c;
+
+  int nn, ne;
+  dvec *x, bond, dgr, dgr_cumul, qx, sum_qx;
+  double vol;
+  //real bondnorm, bondnorm2, invbondnorm;
+  double dbondnorm;
+  double r_1, r_c, r_d, big_a, big_b, big_c, big_k; // for switched cut-off QM/MM
+  dftb_phase1_t dftb1;
+
+  // for PME
+  double charge_checksum, fscal;
+  matrix vir_pme;
+  real energy_pme[1];
+  const int flags_pme_pot_only = GMX_PME_SPREAD | GMX_PME_SOLVE | GMX_PME_CALC_ENER_VIR | GMX_PME_CALC_POT;
+  const int flags_pme_forces = GMX_PME_SPREAD | GMX_PME_SOLVE | GMX_PME_CALC_ENER_VIR | GMX_PME_CALC_F;
+  t_nblist QMMMlist = fr->QMMMlist;
+  energy_pme[0] = (real) 1.0;
+  // static int initialized_pme = 0;
+  // t_gmx_pme *pmedata;
+  t_pbc pbc;
+  //rvec x_pbc, xe_pbc, rbond;
+  int status;
+  static int mdstep = -1;
+
+  static double *qold_pme=NULL;
+
+  static struct timespec time_dftbstart, time_dftbstop, time_1, time_2, time_3, time_4, time_sccstart, time_sccstop;
+
+  clock_gettime(CLOCK_MONOTONIC, &time_dftbstart);
+  print_time_difference("MM+ETC   TIME:", time_dftbstop, time_dftbstart);
+
+  // printf("Phase 1, site %d\n", ibase+1);
+
+  dftb1 = dftb->phase1;
+  nn = dftb1.nn;
+  ne = dftb1.ne;
+  x = dftb1.x;
+  if (qold_pme == NULL)
+    snew(qold_pme, nn);
+
+  // check if the manual QM/MM neighborsearching shall be done
+  mdstep++;
+  if (mdstep - dftb->lastlist_pme >= dftb->nstlist_pme) {
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    do_neighborlist_for_dftb(dftb, box);
+    dftb->lastlist_pme = mdstep;
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB NS  TIME:", time_1, time_2);
+    //printf("SW_NUM_MM\n");
+  }
+
+  /* LENGTHS MUST BE CONVERTED TO BOHR! (5.292d-11 m) */
+
+/*
+  // write out the coordinates
+  printf("%d\ntest coordinates\n", nn);
+  for (n=0; n<nn; n++) {
+    switch (dftb1.izp[n]) {case 0: c='O';break; case 1: c='H';break; case 2: c='C';break; case 3: c='N';break;} 
+    printf("%c %f %f %f\n", c, x[n][XX]*0.529177249, x[n][YY]*0.529177249, x[n][ZZ]*0.529177249);
+  }
+*/
+/*
+  // write out the external charges
+  for (n=0; n<ne; n++) {
+    printf("%f %f %f %f\n", dftb1.xe[n][XX]*0.529177249, dftb1.xe[n][YY]*0.529177249, dftb1.xe[n][ZZ]*0.529177249, dftb1.ze[n]);
+  }
+*/
+  
+  /* set the initial charges
+  for (i=0; i<nn; i++)
+    dftb1.qmat[i] = dftb->qzero1[dftb1.izp[i]];
+  THIS IS NOW DONE ONLY ONCE AT THE DFTB INITIALIZATION
+  AND NOT REPEATED ANY FURTHER!
+  --> POSSIBLE SPEEDUP!
+  */
+
+  // initial setup
+
+  //set_pbc(&pbc, epbcXYZ, box);
+
+  eel = 0.0;
+
+  // printf("  icycle iter niter   e(total)\n");
+  // printf("====================================\n");
+
+  // setup of charge-independent part of H and S
+  for (j=0; j<nn; j++)
+    for (k=0; k<=j; k++) {
+      slkmatrices(j, k, x, dftb1.au, dftb1.bu, dftb->lmax, dftb->dim1, dftb->dr1, dftb1.izp, dftb->skstab1, dftb->skhtab1, dftb->skself1);
+      for (n=0; n<dftb1.ind[k+1]-dftb1.ind[k]; n++)
+        for (m=0; m<dftb1.ind[j+1]-dftb1.ind[j]; m++) {
+          dftb1.hamil[dftb1.ind[j]+m][dftb1.ind[k]+n] = dftb1.au[m][n];
+          dftb1.hamil[dftb1.ind[k]+n][dftb1.ind[j]+m] = dftb1.au[m][n];      
+          dftb1.overl[dftb1.ind[j]+m][dftb1.ind[k]+n] = dftb1.bu[m][n];
+          dftb1.overl[dftb1.ind[k]+n][dftb1.ind[j]+m] = dftb1.bu[m][n];
+	}
+    }
+
+/*
+  printf("Hamiltonian matrix:\n");
+  for (j=0; j<6; j++) {
+    for (k=0; k<6; k++) printf("%10.6f", dftb1.hamil[j][k]);
+    printf("\n");
+  }
+  printf("Overlap matrix:\n");
+  for (j=0; j<6; j++) {
+    for (k=0; k<6; k++) printf("%10.6f", dftb1.overl[j][k]);
+    printf("\n");
+  }
+*/
+
+  switch (dftb->cutoff_qmmm) {
+  double r;
+  case 1: // QM/MM with switched cut-off
+    //int under_r1, under_rc;
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    r_1 = dftb->rcoulomb_pme * NM_TO_BOHR;
+    r_d = QMMM_DFTB_SWITCH * NM_TO_BOHR;
+    r_c = r_1 + r_d;
+    printf("SWITCHED CUT-OFF START\n");
+    big_a =   (5 * r_c - 2 * r_1) / (CUB(r_c) * SQR(r_d));
+    big_b = - (4 * r_c - 2 * r_1) / (CUB(r_c) * CUB(r_d));
+    big_c = - 1 / r_c - big_a / 3 * CUB(r_d) - big_b / 4 * QRT(r_d);
+    for (j=0; j<nn; j++) { // do it for every QM atom
+      dftb1.pot[j] = 0.;
+      // add potential only from MM atoms in the neighbor list!
+      //under_r1 = under_rc = 0;
+      for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+        l = dftb1.neighbor_pme[j][k];
+        pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+        dbondnorm = dnorm(bond);
+        if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+          printf("QM--MM exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+          continue;
+        }
+        if (dbondnorm < r_1) {
+          dftb1.pot[j] += dftb1.ze[l] * (1. / dbondnorm + big_c);
+          //under_r1++;
+          continue;
+        }
+        if (dbondnorm < r_c) {
+          dftb1.pot[j] += dftb1.ze[l] * ( 1. / dbondnorm + big_a / 3. * CUB(dbondnorm - r_1) + big_b / 4. * QRT(dbondnorm - r_1) + big_c);
+          //printf("Interaction  QM=%d, MM=%d: contrib = %f\n", j+1, l+1, dftb1.ze[l] / dnorm(bond));
+          //printf("Interaction  QM=%d, MM=%d: contrib %f\n", j+1, l+1, dbondnorm);
+          //under_rc++;
+          continue;
+        }
+          // else:
+          //printf("Interaction  QM=%d, MM=%d: ignored %f\n", j+1, l+1, dbondnorm);
+      }
+      dftb1.shiftE[j] = dftb1.pot[j];
+      //printf("SW_NUM_MM %d: %d %d\n", j+1, under_r1, under_rc);
+    }
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB QMM TIME:", time_1, time_2);
+    printf("SWITCHED CUT-OFF END\n");
+    break;
+  case 2: // reaction field with epsilon=infinity
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    r_c = dftb->rcoulomb_pme * NM_TO_BOHR;
+    printf("REACTION FIELD START\n");
+    big_c = 3. / (2. * r_c);
+    for (j=0; j<nn; j++) { // do it for every QM atom
+      dftb1.pot[j] = 0.;
+      // add potential only from MM atoms in the neighbor list!
+      for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+        l = dftb1.neighbor_pme[j][k];
+        pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+        dbondnorm = dnorm(bond);
+        if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+          printf("QM--MM exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+          continue;
+        }
+        if (dbondnorm < r_c) {
+          dftb1.pot[j] += dftb1.ze[l] * ( 1. / dbondnorm + SQR(dbondnorm) / 2. / CUB(r_c) - big_c);
+          //printf("Interaction  QM=%d, MM=%d: contrib = %f\n", j+1, l+1, dftb1.ze[l] / dnorm(bond));
+          //printf("Interaction  QM=%d, MM=%d: contrib %f\n", j+1, l+1, dbondnorm);
+          continue;
+        }
+          // else:
+          //printf("Interaction  QM=%d, MM=%d: ignored %f\n", j+1, l+1, dbondnorm);
+      }
+      dftb1.shiftE[j] = dftb1.pot[j];
+      //printf("SW_NUM_MM %d: %d %d\n", j+1, under_r1, under_rc);
+    }
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB RXF TIME:", time_1, time_2);
+    printf("REACTION FIELD END\n");
+    break;
+  case 3: // shifted cut-off, in a similar spirit as reaction field with epsilon=infinity
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    r_c = dftb->rcoulomb_pme * NM_TO_BOHR;
+    printf("SHIFTED CUT-OFF START\n");
+    big_c = 3. / SQR(r_c);
+    big_k = 3. / r_c;
+    //for (r=0.3*NM_TO_BOHR; r<=r_c; r+=0.005*NM_TO_BOHR)
+    //  printf("%f %16.12f %16.12f\n", r/NM_TO_BOHR, 1./r, 1. / r - SQR(r) / CUB(r_c) + big_c * r - big_k);
+    for (j=0; j<nn; j++) { // do it for every QM atom
+      dftb1.pot[j] = 0.;
+      // add potential only from MM atoms in the neighbor list!
+      for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+        l = dftb1.neighbor_pme[j][k];
+        pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+        dbondnorm = dnorm(bond);
+        if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+          printf("QM--MM exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+          continue;
+        }
+        if (dbondnorm < r_c) {
+          dftb1.pot[j] += dftb1.ze[l] * ( 1. / dbondnorm - SQR(dbondnorm) / CUB(r_c) + big_c * dbondnorm - big_k);
+          //printf("Interaction  QM=%d, MM=%d: contrib = %f\n", j+1, l+1, dftb1.ze[l] / dnorm(bond));
+          //printf("Interaction  QM=%d, MM=%d: contrib %f\n", j+1, l+1, dbondnorm);
+          continue;
+        }
+          // else:
+          //printf("Interaction  QM=%d, MM=%d: ignored %f\n", j+1, l+1, dbondnorm);
+      }
+      dftb1.shiftE[j] = dftb1.pot[j];
+      //printf("SW_NUM_MM %d: %d %d\n", j+1, under_r1, under_rc);
+    }
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB QMM TIME:", time_1, time_2);
+    printf("SHIFTED CUT-OFF END\n");
+    break;
+  default: // QM/MM PME preparation -- short-range QM--MM component (real-space)
+    // using fr->ewaldcoeff_q, which has the dimension of 1/distance
+    printf("EWALD-SR START\n");
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    for (j=0; j<nn; j++) { // do it for every QM atom
+      dftb1.pot4[j] = 0.;
+      // add SR potential only from MM atoms in the neighbor list!
+      for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+        l = dftb1.neighbor_pme[j][k];
+        pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+        dbondnorm = dnorm(bond);
+        if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+          printf("QM/MM PME QM--MM short range exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+        } else {
+          if (dbondnorm < fr->rcoulomb * NM_TO_BOHR) {
+            dftb1.pot4[j] += dftb1.ze[l] / dbondnorm * (double) gmx_erfc(fr->ewaldcoeff_q * (real) dbondnorm / NM_TO_BOHR);
+            ////printf("Ewald-SR for QM=%d, MM=%d: contrib = %f\n", j+1, l+1, dftb1.ze[l] / dnorm(bond) * gmx_erfc(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR));
+            //printf("Ewald-SR for QM=%d, MM=%d: contrib %f\n", j+1, l+1, dbondnorm);
+          } else {
+            //printf("Ewald-SR for QM=%d, MM=%d: ignored %f\n", j+1, l+1, dbondnorm);
+          }
+        }
+      }
+    }
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB SR  TIME:", time_1, time_2);
+    printf("EWALD-SR END\n");
+    // end of PME preparation
+  }
+
+  // setup for SCC cycle
+
+  eelold = 1.e10;
+  /* pre-calculate gamma matrix and the derivative
+   * for all atom pairs! */
+  get_gammamat(nn, x, dftb1.izp, dftb->uhubb1, dftb->uhder1, dftb->zeta1,
+		  dftb1.izpxh, dftb1.gammamat, dftb1.gammader);
+
+  /*
+  printf("Gamma matrix:\n");
+  for (i=0; i<3; i++) {
+	  for (j=0; j<3; j++)
+		  printf("%10.6f", dftb1.gammamat[i][j]);
+	  printf("\n");
+  }
+  printf("\n");
+  printf("Gamma deriv:\n");
+  for (i=0; i<3; i++) {
+	  for (j=0; j<3; j++)
+		  printf("%10.6f", dftb1.gammader[i][j]);
+	  printf("\n");
+  }
+  printf("\n");
+  */
+
+  //printf("Number of electrons: dftb1.nel = %d\n", dftb1.nel);
+
+  clock_gettime(CLOCK_MONOTONIC, &time_sccstart);
+  print_time_difference("DFTB PRE TIME:", time_dftbstart, time_sccstart);
+
+  /* SCC cycle starts here */
+  for (niter=0; niter<MAXITER_BROYDEN; niter++) {
+    clock_gettime(CLOCK_MONOTONIC, &time_4);
+    print_time_difference("SCC ITER TIME:", time_3, time_4);
+    time_3 = time_4;
+    /* save old charges */
+    for (i=0; i<nn; i++)
+      dftb1.qmold[i] = dftb1.qmat[i];
+    
+    /*
+    printf("qmat - start SCF:\n");
+    for (i=0; i<nn; i++)
+      printf("%3d %f\n", i+1, dftb1.qmat[i]);
+    */
+
+    /* charge-independent part of H and S */
+    for (j=0; j<nn; j++) {
+      indj = dftb1.ind[j];
+      indj1 = dftb1.ind[j+1];
+      for (k=0; k<nn; k++) {
+        indk = dftb1.ind[k]; 
+        indk1 = dftb1.ind[k+1];
+        for (n=0; n<indk1-indk; n++)
+          for (m=0; m<indj1-indj; m++) {
+            dftb1.a[indj+m][indk+n] = dftb1.hamil[indj+m][indk+n];
+            dftb1.b[indj+m][indk+n] = dftb1.overl[indj+m][indk+n];
+          }
+      }
+    }
+
+    if (! dftb->cutoff_qmmm) {
+      /* calculate the effect of environment with PME
+       * including periodic images of QM charges
+       */
+      if ((dftb->partial_pme == 0) || (niter == 0)) {
+      
+        // FULL PME ALWAYS IN THE FIRST ITERATION
+        for (j=0; j<nn; j++) {
+              dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+              dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+              dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+              dftb1.q_pme[j]    = (real) (-dftb1.qmat[j] + dftb->qzero1[dftb1.izp[j]]) * fr->qr->mm->scalefactor;
+              //printf("Charge %d = %f\n", j, (-dftb1.qmat[j] + dftb->qzero1[dftb1.izp[j]]));
+        }
+        for (j=0; j<ne; j++) {
+              dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+              dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+              dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+              dftb1.q_pme[nn + j]    = (real) dftb1.ze[j];
+        }
+        
+        for (j=0; j<nn; j++)
+          dftb1.pot[j] = 0.0;
+        
+        init_nrnb(dftb1.nrnb_pme);
+        clock_gettime(CLOCK_MONOTONIC, &time_1);
+        gmx_pme_do_dftb(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+              	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_pot_only, dftb1.pot);
+        clock_gettime(CLOCK_MONOTONIC, &time_2);
+        print_time_difference("DFTB PME TIME:", time_1, time_2);
+       
+        for (j=0; j<nn; j++) {
+          dftb1.pot[j] *= KJMOL_TO_HARTREE;
+          qold_pme[j] = dftb1.qmat[j];
+          dftb1.pot6[j] = 0;
+        }
+      } else {
+      // LIMITED PME IN EVERY FOLLOWING ITERATION
+        for (j=0; j<nn; j++) {
+              dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+              dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+              dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+              dftb1.q_pme[j]    = (real) (-dftb1.qmat[j] + qold_pme[j]) * fr->qr->mm->scalefactor;
+        }
+      
+        clock_gettime(CLOCK_MONOTONIC, &time_1);
+        init_nrnb(dftb1.nrnb_pme);
+        gmx_pme_do_dftb(fr->pmedata, 0, nn, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+              	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_pot_only, dftb1.pot6);
+        clock_gettime(CLOCK_MONOTONIC, &time_2);
+        print_time_difference("DFTB PML TIME:", time_1, time_2);
+      
+        for (j=0; j<nn; j++)
+          dftb1.pot6[j] *= KJMOL_TO_HARTREE;
+      }
+      
+      // PME -- corrections
+      for (j=0; j<nn; j++) {
+        // exclude the QM-QM interactions as the shift will be calculated in DFTB for these interactions
+        dftb1.pot2[j] = 0.;
+        for (k=0; k<nn; k++)
+          if (j != k) {
+            dvec_sub(dftb1.x[j], dftb1.x[k], bond);
+            dftb1.pot2[j] -= QM_CHARGE(k) * gmx_erf(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR) / dnorm(bond);
+          }
+        dftb1.pot2[j] *= fr->qr->mm->scalefactor;
+        // the "on-site" contribution to energy
+        dftb1.pot3[j] = - 2. * fr->ewaldcoeff_q / NM_TO_BOHR * QM_CHARGE(j) / sqrt(M_PI) * fr->qr->mm->scalefactor;
+      }
+      // surface correction term
+      // ATTENTION: modified update_QMMM_coord() (qmmm.c) is needed here!
+      if (dftb->surf_corr_pme) {
+        // sum_j q_j vec(x_j)
+        clear_dvec(sum_qx);
+	for (j=0; j<nn; j++) {
+          dsvmul(QM_CHARGE(j), dftb1.x[j], qx);
+          dvec_inc(sum_qx, qx);
+	}
+	for (j=0; j<ne; j++) {
+          dsvmul(dftb1.ze[j], dftb1.xe[j], qx);
+          dvec_inc(sum_qx, qx);
+	}
+	// contribution to the potential
+	vol = box[XX][XX] * box[YY][YY] * box[ZZ][ZZ] * CUB(NM_TO_BOHR);
+	for (j=0; j<nn; j++) {
+	  dftb1.pot7[j] = 4. * M_PI / 3. / vol / (double) dftb->surf_corr_pme * diprod(dftb1.x[j], sum_qx);
+        }
+      }
+      
+      // charge-dependent (modified) Klopman--Ohno correction
+      for (j=0; j<nn; j++)
+        dftb1.pot5[j] = 0.;
+      if (dftb->cdko)
+        //cdkopotential(dftb, &QMMMlist, indexMM);
+        cdkopotential(dftb, box);
+      
+      // save the calculated ESP as the external shift
+      for (j=0; j<nn; j++) {
+        dftb1.shiftE[j] = dftb1.pot[j] + dftb1.pot2[j] + dftb1.pot3[j] + dftb1.pot4[j] + dftb1.pot5[j] + dftb1.pot6[j] + dftb1.pot7[j];
+          // * fr->qr->mm->scalefactor; -- DO NOT DO THIS,
+          //                               BECAUSE THIS HAS BEEN DONE IN THE QM/MM INTERFACE
+        //printf("SHIFTE ATOM %3d: %12.7f\n", j+1, dftb1.shiftE[j]);
+        //printf("POT %3d %12.7f %12.7f %12.7f %12.7f\n", j+1, dftb1.pot[j], dftb1.pot2[j], dftb1.pot3[j], dftb1.pot4[j]);
+      }
+      // end PME
+    } // if(!cutoff_qmmm)
+
+    // calculate atomic hamilton shift (= sum over gamma*charge)
+    for (i=0; i<nn; i++) {
+      dftb1.shift[i]   = - dftb1.shiftE[i];
+      dftb1.shift3[i]  = 0.0;
+      dftb1.shift3a[i] = 0.0;
+      for (j=0; j<nn; j++) {
+        // dftb1.shift[i] += - QM_CHARGE(j) * (i>j ? dftb1.gammamat[i][j] : dftb1.gammamat[j][i]);
+        dftb1.shift[i] += - QM_CHARGE(j) * dftb1.gammamat[i][j];
+        if (dftb->sccmode == 3) {
+	  dftb1.shift3[i]  +=   - QM_CHARGE(j)  * dftb1.gammader[i][j];
+	  dftb1.shift3a[i] += SQR(QM_CHARGE(j)) * dftb1.gammader[j][i];
+	}
+      }
+      if (dftb->sccmode == 3)
+	dftb1.shift3[i] *= - QM_CHARGE(i);
+    }
+    /* void hamilshift(int nn, double *qmat, double *qzero, int *izp, double *qdiff, double **gammamat, double **gammader,
+		int sccmode, double *shift, double *shift3, double *shift3a) */
+
+/*
+    printf("Qmat    ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.qmat[i]);
+    printf("\n");
+    printf("Charges ");
+    for (i=0; i<10; i++) printf("%12.6f", QM_CHARGE(i));
+    printf("\n");
+    printf("ShiftE  ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shiftE[i]);
+    printf("\n");
+    printf("Shift   ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shift[i]);
+    printf("\n");
+    printf("Shift3  ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shift3[i]);
+    printf("\n");
+    printf("Shift3A ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shift3a[i]);
+    printf("\n");
+    printf("\n");
+*/
+/*
+    printf("Shift   ");
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shift[i]);
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shift3[i]);
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shift3a[i]);
+    printf("\n");
+    printf("ShiftE  ");
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shiftE[i]);
+    printf("\n");
+    printf("Shift+ShiftE");
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shiftE[i]+dftb1.shift[i]);
+    printf("\n");
+*/
+
+    /* update the Hamilton matrix
+     * shift3 and shift3a == 0 if sccmode != 3
+     */
+    for (i=0; i<nn; i++)
+      for (li=0; li < SQR(dftb->lmax[dftb1.izp[i]]); li++)
+        for (j=0; j<=i; j++)
+          for (lj=0; lj < SQR(dftb->lmax[dftb1.izp[j]]); lj++) 
+            dftb1.a[dftb1.ind[i]+li][dftb1.ind[j]+lj] += 0.5 * dftb1.overl[dftb1.ind[i]+li][dftb1.ind[j]+lj] * 
+		    (dftb1.shift[i] + dftb1.shift[j] /* sccmode == 2 */
+		     + (2.*dftb1.shift3[i] + dftb1.shift3a[i] + 2.*dftb1.shift3[j] + dftb1.shift3a[j]) / 3.); /* sccmode == 3 */
+    
+    // transpose the arrays a and b
+    for (j=0; j<dftb1.ndim; j++)
+      for (i=0; i<dftb1.ndim; i++) {
+        dftb1.a_trans[j*dftb1.ndim+i] = dftb1.a[i][j];
+        dftb1.b_trans[j*dftb1.ndim+i] = dftb1.b[i][j];
+      }
+
+    // print out the array a
+    /*
+    printf("A before dsygv\n");
+    for (i=0; i<10; i++) {
+      for (j=0; j<10; j++) printf ("%9.5f", dftb1.a_trans[i*dftb1.ndim+j]);
+      printf("\n");
+    }
+    */
+
+    ier = -512;
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    // ier = dsygv(1, 'V', 'L', dftb1.ndim, dftb1.a_trans, dftb1.ndim, dftb1.b_trans, dftb1.ndim, dftb1.ev, dftb1.aux, 3*dftb1.ndim);
+    ier = dsygvd(1, 'V', 'L', dftb1.ndim, dftb1.a_trans, dftb1.ndim, dftb1.b_trans, dftb1.ndim, dftb1.ev,
+                 dftb1.aux, 1 + 6*dftb1.ndim + 2*SQR(dftb1.ndim), dftb1.iaux, 3 + 5*dftb1.ndim);
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB DIA TIME:", time_1, time_2);
+    if ((int) ier) {
+      printf("\nDSYGVD: ier = %d\nEXITING!\n\n", (int) ier);
+      exit(-1);
+    }
+    for (j=0; j<dftb1.ndim; j++)
+      for (i=0; i<dftb1.ndim; i++)
+        dftb1.a[i][j] = dftb1.a_trans[j*dftb1.ndim+i];
+
+    /*
+    printf("\n");
+    printf("ier = %d\n", (int) ier);
+    printf("\n");
+    */
+
+    // print out the array a
+    /*
+    printf("A after dsygv\n");
+    for (i=0; i<10; i++) {
+      for (j=0; j<10; j++) printf ("%9.5f", dftb1.a[i][j]);
+      printf("\n");
+    }
+    */
+
+    // calculate occupation (occ) and Fermi energy (efermi),
+    fermi(dftb1.ndim, dftb1.ev, dftb1.occ, &efermi, dftb1.nel, dftb1.telec);
+    // for (i=0; i<dftb1.ndim; i++)
+    //   printf("%d: %f %f\n", i+1, dftb1.ev[i], dftb1.occ[i]);
+
+    // sum of occupied eigenvalues
+    eel = 0.0;
+    for (i=0; i<dftb1.ndim && dftb1.occ[i] > dftb->dacc; i++)
+      eel += dftb1.occ[i] * dftb1.ev[i];
+
+    // determine Mulliken charges, charge of the whole system and the mulliken
+    mulliken(nn, dftb1.qmat, dftb1.qmulli, &qtot, dftb1.ndim, dftb1.occ, dftb1.a, dftb1.overl, dftb1.ind, dftb->lmax, dftb1.izp);
+    
+/*
+    charge_checksum = 0.;
+    for (i=0; i<nn; i++)
+      charge_checksum += QM_CHARGE(i);
+    printf("sum of qmat after mulliken = %f\n", charge_checksum);
+*/
+    /*
+    printf("qmat - after mulliken:\n");
+    for (i=0; i<nn; i++)
+      printf("%3d %f\n", i+1, dftb1.qmat[i]);
+    */
+
+    // complete calculation of electronic energy
+    // charge-dependent contribution
+    // warning: this will only lead to the right result if convergence has been reached
+    ecoul = ecoul3 = eext = 0.0;
+
+    // note, if CDKO calculation is beind done:
+    //   charges have changed, affecting the QM-MM interactions,
+    //   therefore these have to be recalculated now!
+    // charge-dependent (modified) Klopman--Ohno correction
+    if (dftb->cdko) {
+      for (j=0; j<nn; j++)
+        dftb1.pot5[j] = 0.;
+      //cdkopotential(dftb, &QMMMlist, indexMM);
+      cdkopotential(dftb, box);
+      for (j=0; j<nn; j++)
+        dftb1.shiftE[j] = dftb1.pot[j] + dftb1.pot5[j];
+    }
+
+    for (i=0; i<nn; i++) {
+      ecoul  += dftb1.shift[i] * (dftb1.qmat[i] + dftb->qzero1[dftb1.izp[i]]);
+      ecoul3 += dftb1.shift3[i] * (dftb1.qmat[i] + dftb->qzero1[dftb1.izp[i]]) + dftb1.shift3a[i] * dftb1.qmat[i];
+      eext   += dftb1.shiftE[i]  * QM_CHARGE(i);
+      if (dftb->cdko)
+        eext += 2. * (dftb1.shiftE[i] + dftb1.shiftE2[i]) * dftb1.qmat[i]; // the factor of 2 because eext will be divided by 2 below
+    }
+    eel += - ecoul/2. - ecoul3/3. + eext/2.;
+    // remark: eel containts shiftE already via ev,
+    // shift also contains -shiftE, i.e. ecoul also
+    // contains contributions from EXT
+
+    // print energy
+    //printf("iter: %d, E= %14.9f\n", niter, eel);
+
+    // check convergence
+    if (fabs(eel-eelold) < scftol)
+      break;
+    eelold = eel;
+
+    // Broyden mixing
+    broyden(niter, almix, nn, dftb1.qmold, dftb1.qmat, dftb->broyden);
+    for (i=0; i<nn; i++)
+      dftb1.qmat[i] = dftb1.qmold[i];
+
+/*
+    charge_checksum = 0.;
+    for (i=0; i<nn; i++)
+      charge_checksum += QM_CHARGE(i);
+    printf("sum of qmat after broyden = %f\n", charge_checksum);
+*/
+    //printf("qmat - after Broyden:\n");
+    //for (i=0; i<(nn<=10?nn:10); i++)
+      //printf(" %8.5f", dftb1.qmat[i]);
+      //printf("%3d %f\n", i+1, dftb1.qmat[i]);
+      //printf("\n");
+
+  } // end SCC cycle
+
+  if (dftb->surf_corr_pme) {
+    printf("total dip. moment %12.4f D\n", 2.54174623 * dnorm(sum_qx));
+    printf("volume %12.7f nm^3\n", vol / CUB(NM_TO_BOHR));
+    printf("POT7:");
+    for (j=0; j<nn; j++) {
+      printf("%12.7f", dftb1.pot7[j]);
+    }
+    printf("\n");
+  }
+
+/*
+  printf("qmat - after SCC:\n");
+  for (i=0; i<(nn<=10?nn:10); i++)
+    printf(" %8.5f", dftb1.qmat[i]);
+  printf("\n");
+*/
+/*
+  printf("Shift   ");
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shift[i]);
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shift3[i]);
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shift3a[i]);
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shiftE[i]);
+  printf("\n");
+*/
+
+/*
+  printf("SHIFTE ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.shiftE[j]);
+  printf("\n");
+  printf("POT  ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot[j]);
+  printf("\n");
+  printf("POT2 ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot2[j]);
+  printf("\n");
+  printf("POT3 ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot3[j]);
+  printf("\n");
+  printf("POT4 ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot4[j]);
+  printf("\n");
+*/
+
+  printf("final eigenvalues:");
+  for (i=dftb1.nel/2-2; i<dftb1.nel/2; i++)
+    printf(" %8.5f (%5.3f)", dftb1.ev[i], dftb1.occ[i]);
+  printf(" FERMI");
+  for (i=dftb1.nel/2; i<dftb1.nel/2+2; i++)
+    printf(" %8.5f (%5.3f)", dftb1.ev[i], dftb1.occ[i]);
+  printf("\n");
+
+  /* debug */
+  printf ("niter = %d, eel = %12.6f, ecoul = %12.6f, ecoul3 = %12.6f, eext = %12.6f,\n", niter, eel, ecoul, ecoul3, eext);
+  //printf ("niter = %d, eel = %12.6f, ecoul = %12.6f, ecoul3 = %12.6f, eext = %12.6f, ", niter, eel, ecoul, ecoul3, eext);
+
+  clock_gettime(CLOCK_MONOTONIC, &time_sccstop);
+
+  // write out the eigenvalues
+
+/*
+  for (i=0; i<dftb1.ndim; i++)
+    printf("%2d %f\n", i+1, dftb1.ev[i]);
+*/
+  // outspec(nn, dftb1.ndim, dftb1.ind, dftb1.ev, dftb1.occ, efermi, dftb1.qmat, dftb1.qmulli, dftb, dftb1);
+  // outeigenvectors(dftb1.a, dftb1.ev, dftb1.ind, nn, dftb1);
+
+  // printf("%5d %5d / %2d %14.6f\n", 1, 1, niter, eel);
+  // printf("\n***** end of dftb *****\n");
+
+  /* CONTINUE HERE WITH REPULSION AND FORCES! */
+
+  // calculate atomic hamilton shift (= sum over gamma*charge)
+    for (i=0; i<nn; i++) {
+      dftb1.shift[i]   = - dftb1.shiftE[i];
+      dftb1.shift3[i]  = 0.0;
+      dftb1.shift3a[i] = 0.0;
+      for (j=0; j<nn; j++) {
+        // dftb1.shift[i] += - QM_CHARGE(j) * (i>j ? dftb1.gammamat[i][j] : dftb1.gammamat[j][i]);
+        dftb1.shift[i] += - QM_CHARGE(j) * dftb1.gammamat[i][j];
+        if (dftb->sccmode == 3) {
+	  dftb1.shift3[i]  +=   - QM_CHARGE(j)  * dftb1.gammader[i][j];
+	  dftb1.shift3a[i] += SQR(QM_CHARGE(j)) * dftb1.gammader[j][i];
+	}
+      }
+      if (dftb->sccmode == 3)
+	dftb1.shift3[i] *= - QM_CHARGE(i);
+    }
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.grad[i]);
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  usual_gradient(dftb, x, dftb1.partgrad);
+/*
+  printf("gradient components from usual_gradient\n");
+  for (i=0; i<nn; i++)
+    printf("%5d%12.8f%12.8f%12.8f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from usual_gradient\n");
+*/
+  for (i=0; i<nn; i++)
+    copy_dvec(dftb1.partgrad[i], dftb1.grad[i]);
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  gamma_gradient(dftb, x, dftb1.partgrad);
+/*
+  printf("gradient components from gamma_gradient\n");
+  for (i=0; i<nn; i++)
+    printf("%5d%12.8f%12.8f%12.8f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from gamma_gradient\n");
+*/
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+
+  /* externalchgrad */ /* DO IT HERE WITH PME SOMEHOW - BOTH QM AND MM! */
+
+  /*
+  printf("TEST\n");
+  printf("rlist = %f\n", fr->rlist);
+  printf("rcoulomb = %f\n", fr->rcoulomb);
+  printf("rcoulomb_switch = %f\n", fr->rcoulomb_switch);
+  printf("ewaldcoeff_q = %f\n", fr->ewaldcoeff_q);
+  printf("contrib at cutoff = %f\n", gmx_erfc(fr->ewaldcoeff_q * fr->rcoulomb) / fr->rcoulomb);
+  printf("contrib at 0.7 nm = %f\n", gmx_erfc(fr->ewaldcoeff_q * 0.7) / 0.7);
+  printf("contrib at 0.5 nm = %f\n", gmx_erfc(fr->ewaldcoeff_q * 0.5) / 0.5);
+  printf("END TEST\n");
+  */
+
+  for (j=0; j<nn; j++)
+    clear_dvec(dftb1.partgrad[j]);
+  for (j=0; j<ne; j++)
+    clear_dvec(dftb1.mmgrad[j]);
+
+  switch (dftb->cutoff_qmmm) {
+  double r;
+  case 1:
+    printf("SWITCHED CUT-OFF GRADIENTS START\n");
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    r_1 = dftb->rcoulomb_pme * NM_TO_BOHR;
+    r_d = QMMM_DFTB_SWITCH * NM_TO_BOHR;
+    r_c = r_1 + r_d;
+    big_a =   (5 * r_c - 2 * r_1) / (CUB(r_c) * SQR(r_d));
+    big_b = - (4 * r_c - 2 * r_1) / (CUB(r_c) * CUB(r_d));
+    for (j=0; j<nn; j++) { // do it for every QM atom
+      // add SR potential only from MM atoms in the neighbor list!
+      for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+        l = dftb1.neighbor_pme[j][k];
+        pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+        dbondnorm = dnorm(bond);
+        if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+          printf("QM/MM PME QM--MM short range exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+	  continue;
+        }
+        if (dbondnorm < r_1) {
+          fscal = - QM_CHARGE(j) * dftb1.ze[l] / CUB(dbondnorm);
+          dsvmul(fscal, bond, dgr);
+          //printf("SR: QM %1d -- MM %1d:%12.7f%12.7f%12.7f\n", j+1, k+1, dgr[XX], dgr[YY], dgr[ZZ]);
+          dvec_inc(dftb1.partgrad[j], dgr);
+          dvec_dec(dftb1.mmgrad[l], dgr);
+	  continue;
+        }
+        if (dbondnorm < r_c) {
+          fscal = - QM_CHARGE(j) * dftb1.ze[l] / dbondnorm * (1. / SQR(dbondnorm)
+                - big_a * SQR(dbondnorm - r_1) - big_b * CUB(dbondnorm - r_1));
+          dsvmul(fscal, bond, dgr);
+          dvec_inc(dftb1.partgrad[j], dgr);
+          dvec_dec(dftb1.mmgrad[l], dgr);
+	  continue;
+	}
+	// else
+	// ... beyond cutoff+switch, nothing to do
+      }
+    }
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB F-Q TIME:", time_1, time_2);
+    printf("SWITCHED CUT-OFF GRADIENTS END\n");
+    break;
+  case 2:
+    printf("REACTION FIELD GRADIENTS START\n");
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    r_c = dftb->rcoulomb_pme * NM_TO_BOHR;
+    for (j=0; j<nn; j++) { // do it for every QM atom
+      // add SR potential only from MM atoms in the neighbor list!
+      for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+        l = dftb1.neighbor_pme[j][k];
+        pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+        dbondnorm = dnorm(bond);
+        if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+          printf("QM/MM PME QM--MM short range exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+	  continue;
+        }
+        if (dbondnorm < r_c) {
+          fscal = - QM_CHARGE(j) * dftb1.ze[l] / dbondnorm * (1. / SQR(dbondnorm) - dbondnorm / CUB(r_c));
+          dsvmul(fscal, bond, dgr);
+          dvec_inc(dftb1.partgrad[j], dgr);
+          dvec_dec(dftb1.mmgrad[l], dgr);
+	  continue;
+	}
+	// else
+	// ... beyond cutoff+switch, nothing to do
+      }
+    }
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB F-Q TIME:", time_1, time_2);
+    printf("REACTION FIELD GRADIENTS END\n");
+    break;
+  case 3:
+    printf("SHIFTED CUT-OFF GRADIENTS START\n");
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    r_c = dftb->rcoulomb_pme * NM_TO_BOHR;
+    big_c = 3. / SQR(r_c);
+    //for (r=0.3*NM_TO_BOHR; r<=r_c; r+=0.005*NM_TO_BOHR)
+    //  printf("%f %16.12f %16.12f\n", r/NM_TO_BOHR, 1./SQR(r), 1./SQR(r) + 2.*r/CUB(r_c) - big_c);
+    for (j=0; j<nn; j++) { // do it for every QM atom
+      // add SR potential only from MM atoms in the neighbor list!
+      for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+        l = dftb1.neighbor_pme[j][k];
+        pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+        dbondnorm = dnorm(bond);
+        if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+          printf("QM/MM PME QM--MM short range exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+	  continue;
+        }
+        if (dbondnorm < r_c) {
+          fscal = - QM_CHARGE(j) * dftb1.ze[l] / dbondnorm * (1. / SQR(dbondnorm) + 2. * dbondnorm / CUB(r_c) - big_c);
+          dsvmul(fscal, bond, dgr);
+          dvec_inc(dftb1.partgrad[j], dgr);
+          dvec_dec(dftb1.mmgrad[l], dgr);
+	  continue;
+	}
+      }
+    }
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB F-Q TIME:", time_1, time_2);
+    printf("SHIFTED CUT-OFF GRADIENTS END\n");
+    break;
+  default:
+    /* QM gradient */
+    // the coordinates and charges of QM atoms
+    for (j=0; j<nn; j++) {
+          dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+          dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+          dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+          dftb1.q_pme[j]    = (real) QM_CHARGE(j) * fr->qr->mm->scalefactor;
+    }
+    // the coordinates and the charges of MM atoms
+    for (j=0; j<ne; j++) {
+          dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+          dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+          dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+          dftb1.q_pme[nn + j]    = (real) dftb1.ze[j];
+    }
+    // PME -- long-range component
+    init_nrnb(dftb1.nrnb_pme);
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    gmx_pme_do_dftb(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+        	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_forces, dftb1.pot);
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB F-1 TIME:", time_1, time_2);
+    for (j=0; j<nn; j++)
+      for (m=0; m<DIM; m++)
+        dftb1.partgrad[j][m] = - dftb1.f_pme[j][m] / HARTREE_BOHR2MD; // partgrad is gradient, i.e. the negative of force
+    /*
+    printf("gradient components to QM - LR\n");
+    for (i=0; i<nn; i++)
+      printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+    */
+    //printf("PME corrections - checkpoint 1\n");
+    // PME corrections -- exclude QM--QM interaction
+    //printf("gradient QM/MM correction - QM--QM exclusions\n");
+    for (j=0; j<nn; j++) {
+      // exclude the QM--QM interactions -- gradient of contribution to potential dftb1.pot2[]
+      // note that the gradient of contribution to potential dftb1.pot3[] vanishes!
+      for (k=0; k<j; k++) {
+        dvec_sub(dftb1.x[j], dftb1.x[k], bond);
+        // negative of gradient -- we want to subtract it from partgrad
+        fscal = QM_CHARGE(j) * QM_CHARGE(k) / SQR(dnorm(bond)) * fr->qr->mm->scalefactor *
+               (gmx_erf(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR) / dnorm(bond)
+                - M_2_SQRTPI * fr->ewaldcoeff_q / NM_TO_BOHR * exp(-SQR(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR)));
+        dsvmul(fscal, bond, dgr); // vec(dgr) = fscal * vec(bond)
+        dvec_inc(dftb1.partgrad[j], dgr);
+        dvec_dec(dftb1.partgrad[k], dgr);
+        //printf("%1d-%1d:%12.7f%12.7f%12.7f\n", j+1, k+1, dgr[XX], dgr[YY], dgr[ZZ]);
+      }
+    }
+    //printf("PME corrections - checkpoint 2\n");
+
+    /* MM gradient -- LR component to QM/MM */
+    // the coordinates and charges of QM atoms
+    for (j=0; j<nn; j++) {
+          dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+          dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+          dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+          dftb1.q_pme[j]    = (real) QM_CHARGE(j) * fr->qr->mm->scalefactor;
+    }
+    // the coordinates and the charges of MM atoms
+    for (j=0; j<ne; j++) {
+          dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+          dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+          dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+          dftb1.q_pme[nn + j]    = 0.; // ASK GERRIT IF THIS IS REALLY NOT INCLUDED IN GROMACS MM CALCULATIONS!
+    }
+    /*
+    charge_checksum = 0.;
+    for (j=0; j<nn; j++)
+      charge_checksum += dftb1.q_pme[j];
+    printf("  PME charge checksum QM = %f\n", charge_checksum);
+    charge_checksum = 0.;
+    for (j=nn; j<nn+ne; j++)
+      charge_checksum += dftb1.q_pme[j];
+    printf("  PME charge checksum MM = %f\n", charge_checksum);
+    */
+    //PME
+    init_nrnb(dftb1.nrnb_pme);
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    gmx_pme_do_dftb_mm_forces(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+        	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_forces);
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB F-2 TIME:", time_1, time_2);
+    /*
+    printf("MM FORCES EWALD START\n");
+    for (j=0; j<ne; j++)
+      printf("%5d %9.6f %9.6f %9.6f\n", j+1, dftb1.f_pme[nn + j][XX], dftb1.f_pme[nn + j][YY], dftb1.f_pme[nn + j][ZZ]);
+    printf("MM FORCES EWALD STOP\n");
+    */
+    for (j=0; j<ne; j++) {
+      dftb1.mmgrad[j][XX] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][XX]);
+      dftb1.mmgrad[j][YY] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][YY]);
+      dftb1.mmgrad[j][ZZ] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][ZZ]);
+    } // dsvmul(-dftb1.ze[j] / HARTREE_BOHR2MD, dftb1.f_pme[nn + j], dftb1.mmgrad[j]);
+    //printf("PME corrections - checkpoint 3\n");
+
+    //printf("gradient components to MM - LR\n");
+    //for (i=0; i<10; i++)
+    //  if (dnorm(dftb1.mmgrad[i]) > 0.00001)
+    //    printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.mmgrad[i][XX], dftb1.mmgrad[i][YY], dftb1.mmgrad[i][ZZ]);
+
+    for (j=0; j<nn; j++) { // do it for every QM atom
+      // add SR potential only from MM atoms in the neighbor list!
+      for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+        l = dftb1.neighbor_pme[j][k];
+        pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+        dbondnorm = dnorm(bond);
+        if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+          printf("QM/MM PME QM--MM short range exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+        } else {
+          if (dbondnorm < fr->rcoulomb * NM_TO_BOHR) {
+            fscal = QM_CHARGE(j) * dftb1.ze[l] / SQR(dbondnorm) * fr->qr->mm->scalefactor *
+                   (- (double) gmx_erfc(fr->ewaldcoeff_q * dbondnorm / NM_TO_BOHR) / dbondnorm
+                    - M_2_SQRTPI * (double) fr->ewaldcoeff_q / NM_TO_BOHR * exp(-SQR((double) fr->ewaldcoeff_q * dbondnorm / NM_TO_BOHR)));
+            dsvmul(fscal, bond, dgr);
+            //printf("SR: QM %1d -- MM %1d:%12.7f%12.7f%12.7f\n", j+1, k+1, dgr[XX], dgr[YY], dgr[ZZ]);
+            // short-range QM/MM contribution to QM gradient
+            dvec_inc(dftb1.partgrad[j], dgr);
+            // short-range QM/MM contribution to MM gradient
+            dvec_dec(dftb1.mmgrad[l], dgr);
+          }
+        }
+      }
+    }
+    //printf("\nPME corrections - checkpoint 4\n");
+
+    // surface correction term
+    if (dftb->surf_corr_pme) {
+      // sum_j q_j vec(x_j)
+      clear_dvec(sum_qx);
+      for (j=0; j<nn+ne; j++) {
+        dsvmul(dftb1.q_pme[j], dftb1.x_pme[j], qx);
+        dvec_inc(sum_qx, qx);
+      }
+      // is it OK that the QM charges have been scaled down possibly??? (mm->scalefactor)
+      dsvmul(NM_TO_BOHR, sum_qx, sum_qx);
+      // contribution to the potential
+      vol = box[XX][XX] * box[YY][YY] * box[ZZ][ZZ] * CUB(NM_TO_BOHR);
+      // partgrad is gradient, i.e. negative of force
+      for (j=0; j<nn; j++) {
+        dsvmul(4 * M_PI / 3 / vol / (double) dftb->surf_corr_pme * QM_CHARGE(j), sum_qx, dgr);
+        dvec_inc(dftb1.partgrad[j], dgr);
+      }
+      // do this correction for MM atoms as well
+      for (j=0; j<ne; j++) {
+        dsvmul(4 * M_PI / 3 / vol / (double) dftb->surf_corr_pme * dftb1.ze[j], sum_qx, dgr);
+        dvec_inc(dftb1.mmgrad[j], dgr);
+      }
+    }
+    //printf("PME corrections - checkpoint 5\n");
+
+    /*
+    printf("gradient components to MM - complete\n");
+    for (k=0; k<ne; k++)
+      if (dnorm(dftb1.mmgrad[k]) > 0.00001)
+        printf("%3d%12.7f%12.7f%12.7f\n", k+1, dftb1.mmgrad[k][XX], dftb1.mmgrad[k][YY], dftb1.mmgrad[k][ZZ]);
+    */
+    /*
+    printf("gradient components to QM - complete\n");
+    for (i=0; i<nn; i++)
+      printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+    */
+
+    break;
+    // end PME
+  } // switch (cutoff_qmmm)
+
+/*
+  printf("TEST - CUT-OFF QM/MM GRADIENTS ON MM ATOMS!\n");
+  for (k=0; k<dftb1.ne; k++) {
+    clear_dvec(dgr_cumul);
+    for (j=0; j<nn; j++) {
+      dvec_sub(dftb1.x[j], dftb1.xe[k], bond);
+      fscal = - (double) QM_CHARGE(j) * dftb1.ze[k] / CUB(dnorm(bond));
+      dsvmul(fscal, bond, dgr);
+      dvec_dec(dgr_cumul, dgr);
+    }
+    if (dnorm2(dgr_cumul) > 0.0000001)
+      printf("%3d%12.7f%12.7f%12.7f\n", k+1, dgr_cumul[XX], dgr_cumul[YY], dgr_cumul[ZZ]);
+  }
+  printf("TEST - CUT-OFF QM/MM GRADIENTS ON QM ATOMS!\n");
+  for (j=0; j<nn; j++) {
+    clear_dvec(dgr_cumul);
+    for (k=0; k<dftb1.ne; k++) {
+      dvec_sub(dftb1.x[j], dftb1.xe[k], bond);
+      fscal = - (double) QM_CHARGE(j) * dftb1.ze[k] / CUB(dnorm(bond));
+      dsvmul(fscal, bond, dgr);
+      dvec_inc(dgr_cumul, dgr);
+    }
+    if (dnorm2(dgr_cumul) > 0.0000001)
+      printf("%3d%12.7f%12.7f%12.7f\n", j+1, dgr_cumul[XX], dgr_cumul[YY], dgr_cumul[ZZ]);
+  }
+*/
+
+  // end PME
+  
+/*
+  // debug output
+  printf("gradient components from externalchgrad\n");
+  for (i=0; i<nn; i++)
+    printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from externalchgrad\n");
+*/
+
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  erep = repulsive(dftb, x, dftb1.partgrad);
+/*
+  printf("gradient components from repulsive\n");
+  for (i=0; i<nn; i++)
+    printf("%5d%12.8f%12.8f%12.8f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from repulsive\n");
+*/
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+
+  // ADD THE DISPERSION INTERACTION (IF DESIRED)
+  if (dftb->dispersion) {
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    for (i=0; i<nn; i++)
+      clear_dvec(dftb1.partgrad[i]);
+    switch (dftb->dispersion) {
+      case 1: /* Grimme's DFT-D3 */
+              edisp = dispersion_dftd3(dftb, dftb1.partgrad);
+              break;
+      case 2: /* Elstner's 2001, not yet implemented, cannot happen... */
+              edisp = 0.;
+              break;
+    }
+    for (i=0; i<nn; i++)
+      dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB DIS TIME:", time_1, time_2);
+  } else {
+    edisp = 0.;
+  }
+
+  // ADD THE CORRECTION DUE TO CHARGE-DEPENDENT KLOPMAN--OHNO INTERACTION (IF DESIRED)
+  if (dftb->cdko) {
+    // clear the arrays for the gradients
+    for (i=0; i<nn; i++)
+      clear_dvec(dftb1.partgrad[i]);
+    for (i=0; i<ne; i++)
+      clear_dvec(dftb1.partmmgrad[i]);
+    //cdkograd(dftb, &QMMMlist, indexMM, x, xe, partgrad, partmmgrad);
+    cdkograd(dftb, box, dftb1.partgrad, dftb1.partmmgrad);
+    for (i=0; i<nn; i++)
+      dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+    for (i=0; i<ne; i++)
+      dvec_inc(dftb1.mmgrad[i], dftb1.partmmgrad[i]);
+  }
+
+  printf ("erep = %16.10f      eel+erep = %16.10f     edisp = %16.10f\n", erep, eel+erep, edisp);
+
+  /* copy gradients to the corresponding arrays - copied from call_gaussian() */
+  //printf("begin forces\n");
+  for(i=0; i<nn; i++) {
+//    /*
+    printf("GRAD %3d%12.7f%12.7f%12.7f\n", i+1, -dftb1.grad[i][XX], -dftb1.grad[i][YY], -dftb1.grad[i][ZZ]);
+//    */
+    for(j=0; j<DIM; j++) {
+      f[i][j]      = (real) HARTREE_BOHR2MD * dftb1.grad[i][j];
+      fshift[i][j] = (real) HARTREE_BOHR2MD * dftb1.grad[i][j];
+    }
+  }
+  //printf("  end forces\n");
+  for(i=0; i<ne; i++) {
+    /*
+    if (SQR(dftb1.mmgrad[i][XX]) + SQR(dftb1.mmgrad[i][YY]) + SQR(dftb1.mmgrad[i][ZZ]) > 0.0001)
+      printf("MMGRAD %5d%12.7f%12.7f%12.7f\n", i+1, -dftb1.mmgrad[i][XX], -dftb1.mmgrad[i][YY], -dftb1.mmgrad[i][ZZ]);
+    */
+    for(j=0; j<DIM; j++) {
+      f[i + nn][j]      = (real) HARTREE_BOHR2MD * dftb1.mmgrad[i][j];      
+      fshift[i + nn][j] = (real) HARTREE_BOHR2MD * dftb1.mmgrad[i][j];
+    }
+  }
+
+  clock_gettime(CLOCK_MONOTONIC, &time_dftbstop);
+  print_time_difference("DFTB FRC TIME:", time_sccstop, time_dftbstop);
+  print_time_difference("DFTB     TIME:", time_dftbstart, time_dftbstop);
+
+  return HARTREE2KJ * AVOGADRO * (eel + erep + edisp);
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_eglcao.c.debug gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_eglcao.c.debug
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_eglcao.c.debug	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_eglcao.c.debug	2014-10-01 01:46:07.000000000 +0200
@@ -0,0 +1,1432 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include<time.h>
+//#include"charge_transfer.h"
+#include"qm_dftb.h"
+
+/*      PROGRAM DYLCAO */
+/*     ================ */
+
+/*     Copyright 1991 by Peter Blaudeck, Dirk Porezag */
+
+/* ********************************************************************* */
+
+/*     PROGRAM CHARACTERISTICS */
+/*     ----------------------- */
+
+/* DYLCAO calculates the dynamics of various systems */
+/* within a two-centre SETB formalism */
+
+/* ********************************************************************* */
+/*
+
+     PHASE 1 FOR CHARGE TRANSFER -- CALC OF FRAGMENTS
+
+*/
+/* ********************************************************************* */
+
+
+/*     SUBROUTINE EGLCAO */
+/*     ================= */
+
+/*     Copyright 1997 by Peter Blaudeck, Dirk Porezag, Michael Haugk, */
+/*                       Joachim Elsner */
+/*     Bo Song for CMD-QM/MM based on Fragments, April 2007 */
+
+/* ********************************************************************* */
+
+/*     PROGRAM CHARACTERISTICS */
+/*     ----------------------- */
+
+/* eglcao calculates energy and gradient for dylcao as shown by Seifert. */
+/* The determination of the occupation numbers has been changed to be */
+/* also valid for metallic systems. */
+
+/* PARAMETERS: */
+/* nn      i  number of atoms */
+/* x       r  coordinates (n3) */
+/* eel     r  electronic energy */
+/* miter   i  number of scf-iterations performed */
+/* qmat    r */
+
+/* ********************************************************************* */
+
+// missing in include/vec.h somehow...
+static gmx_inline void dvec_dec(dvec a,const dvec b)
+{
+  double x,y,z;
+
+  x=a[XX]-b[XX];
+  y=a[YY]-b[YY];
+  z=a[ZZ]-b[ZZ];
+
+  a[XX]=x;
+  a[YY]=y;
+  a[ZZ]=z;
+}
+
+// adopted from src/gmxlib/pbc.c
+static inline void pbc_dx_dftb(matrix box, const dvec x1, const dvec x2, dvec dx)
+{
+    int i;
+    double length;
+
+    for(i=0; i<DIM; i++) {
+        dx[i] = x1[i] - x2[i];
+        length = (double) box[i][i] * NM_TO_BOHR;
+        while (dx[i] > length / 2.) {
+            dx[i] -= length;
+        }
+        while (dx[i] < - length / 2.) {
+            dx[i] += length;
+        }
+    }
+
+    return;
+}
+
+// lapack routine(s)
+static long dsygv(long itype, char jobz, char uplo, long n, double *a, long lda,
+             double *b, long ldb, double *w, double *work, long lwork)
+{
+  extern void dsygv_(long *, char *, char *, long *, double *, long *, double *,
+                long *, double *, double *, long *, long *);
+  long info;
+  dsygv_(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work, &lwork, &info);
+  return info;
+}
+
+static long dsygvd(long itype, char jobz, char uplo, long n, double *a, long lda,
+             double *b, long ldb, double *w, double *work, long lwork, long *iwork, long liwork)
+{
+  extern void dsygvd_(long *, char *, char *, long *, double *, long *, double *,
+                long *, double *, double *, long *, long *, long *, long *);
+  long info;
+  dsygvd_(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work, &lwork, iwork, &liwork, &info);
+  return info;
+}
+
+void print_time_difference(char *s, struct timespec start, struct timespec end)
+{
+  int sec, nsec;
+  long long value=0ll;
+
+  value = 1000000000ll * ((long long) end.tv_sec - (long long) start.tv_sec) + (long long) (end.tv_nsec - start.tv_nsec);
+  printf("%s %12lld\n", s, value);
+
+  return;
+}  
+
+/* NDIM = NORB !!! */
+
+double run_dftb1(dftb_t *dftb, rvec f[], rvec fshift[], t_commrec *cr, t_forcerec *fr, matrix box) // i - nucleobase to be calculated
+// int eglcao(int nn, double x[NNDIM][3], double *eel, int *miter, double qmat[NNDIM], int phase)
+{
+  const double scftol = 1.e-9;
+  const double almix = 0.2;
+  //const int maxiter = 70;
+  
+  int indj, indk, indj1, indk1;
+  double eel, ecoul, ecoul3, efermi, eelold, eext, erep, edisp, esave;
+  //
+  int i, j, k, l, m, n, li, lj, niter, xx, yy, zz;
+  //int nmaofo, ii, jj, kk, ll, jfo, jao;
+  //double r2;
+  double qtot, r;
+  // lapack
+  long ier; // ndim;
+  char c;
+
+  int nn, ne;
+  dvec *x, bond, dgr, dgr_cumul, xsave;
+  //real bondnorm, bondnorm2, invbondnorm;
+  double dbondnorm;
+  dftb_phase1_t dftb1;
+
+  // for PME
+  double charge_checksum, fscal;
+  matrix vir_pme;
+  real energy_pme[1];
+  const int flags_pme_pot_only = GMX_PME_SPREAD | GMX_PME_SOLVE | GMX_PME_CALC_ENER_VIR | GMX_PME_CALC_POT;
+  const int flags_pme_forces = GMX_PME_SPREAD | GMX_PME_SOLVE | GMX_PME_CALC_ENER_VIR | GMX_PME_CALC_F;
+  t_nblist QMMMlist = fr->QMMMlist;
+  energy_pme[0] = (real) 1.0;
+  // static int initialized_pme = 0;
+  // t_gmx_pme *pmedata;
+  t_pbc pbc;
+  //rvec x_pbc, xe_pbc, rbond;
+  int status;
+  static int mdstep = -1;
+
+  static double *qold_pme=NULL;
+
+  static struct timespec time_dftbstart, time_dftbstop, time_1, time_2, time_3, time_4, time_sccstart, time_sccstop;
+
+  clock_gettime(CLOCK_MONOTONIC, &time_dftbstart);
+  print_time_difference("MM+ETC   TIME:", time_dftbstop, time_dftbstart);
+
+  // printf("Phase 1, site %d\n", ibase+1);
+
+  dftb1 = dftb->phase1;
+  nn = dftb1.nn;
+  ne = dftb1.ne;
+  x = dftb1.x;
+  if (qold_pme == NULL)
+    snew(qold_pme, nn);
+
+  do_neighborlist_for_dftb(dftb, box);
+
+  /* LENGTHS MUST BE CONVERTED TO BOHR! (5.292d-11 m) */
+
+/*
+  // write out the coordinates
+  printf("%d\ntest coordinates\n", nn);
+  for (n=0; n<nn; n++) {
+    switch (dftb1.izp[n]) {case 0: c='O';break; case 1: c='H';break; case 2: c='C';break; case 3: c='N';break;} 
+    printf("%c %f %f %f\n", c, x[n][XX]*0.529177249, x[n][YY]*0.529177249, x[n][ZZ]*0.529177249);
+  }
+*/
+/*
+  // write out the external charges
+  for (n=0; n<ne; n++) {
+    printf("%f %f %f %f\n", dftb1.xe[n][XX]*0.529177249, dftb1.xe[n][YY]*0.529177249, dftb1.xe[n][ZZ]*0.529177249, dftb1.ze[n]);
+  }
+*/
+  
+  /* set the initial charges
+  for (i=0; i<nn; i++)
+    dftb1.qmat[i] = dftb->qzero1[dftb1.izp[i]];
+  THIS IS NOW DONE ONLY ONCE AT THE DFTB INITIALIZATION
+  AND NOT REPEATED ANY FURTHER!
+  --> POSSIBLE SPEEDUP!
+  */
+
+  // initial setup
+
+  //set_pbc(&pbc, epbcXYZ, box);
+
+  eel = 0.0;
+
+  // printf("  icycle iter niter   e(total)\n");
+  // printf("====================================\n");
+
+  // setup of charge-independent part of H and S
+  for (j=0; j<nn; j++)
+    for (k=0; k<=j; k++) {
+      slkmatrices(j, k, x, dftb1.au, dftb1.bu, dftb->lmax, dftb->dim1, dftb->dr1, dftb1.izp, dftb->skstab1, dftb->skhtab1, dftb->skself1);
+      for (n=0; n<dftb1.ind[k+1]-dftb1.ind[k]; n++)
+        for (m=0; m<dftb1.ind[j+1]-dftb1.ind[j]; m++) {
+          dftb1.hamil[dftb1.ind[j]+m][dftb1.ind[k]+n] = dftb1.au[m][n];
+          dftb1.hamil[dftb1.ind[k]+n][dftb1.ind[j]+m] = dftb1.au[m][n];      
+          dftb1.overl[dftb1.ind[j]+m][dftb1.ind[k]+n] = dftb1.bu[m][n];
+          dftb1.overl[dftb1.ind[k]+n][dftb1.ind[j]+m] = dftb1.bu[m][n];
+	}
+    }
+
+/*
+  printf("Hamiltonian matrix:\n");
+  for (j=0; j<6; j++) {
+    for (k=0; k<6; k++) printf("%10.6f", dftb1.hamil[j][k]);
+    printf("\n");
+  }
+  printf("Overlap matrix:\n");
+  for (j=0; j<6; j++) {
+    for (k=0; k<6; k++) printf("%10.6f", dftb1.overl[j][k]);
+    printf("\n");
+  }
+*/
+
+  // QM/MM PME preparation -- short-range QM--MM component (real-space)
+  // using fr->ewaldcoeff_q, which has the dimension of 1/distance
+  printf("EWALD-SR START\n");
+  for (j=0; j<nn; j++) { // do it for every QM atom
+    dftb1.pot4[j] = 0.;
+    // add SR potential only from MM atoms in the neighbor list!
+    for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+      l = dftb1.neighbor_pme[j][k];
+      pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+      dbondnorm = dnorm(bond);
+      if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+        printf("QM/MM PME QM--MM short range exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+      } else {
+        if (dbondnorm < fr->rcoulomb * NM_TO_BOHR) {
+          dftb1.pot4[j] += dftb1.ze[l] / dbondnorm * (double) gmx_erfc(fr->ewaldcoeff_q * (real) dbondnorm / NM_TO_BOHR);
+          ////printf("Ewald-SR for QM=%d, MM=%d: contrib = %f\n", j+1, l+1, dftb1.ze[l] / dnorm(bond) * gmx_erfc(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR));
+	  //printf("Ewald-SR for QM=%d, MM=%d: contrib %f\n", j+1, l+1, dbondnorm);
+        } else {
+	  //printf("Ewald-SR for QM=%d, MM=%d: ignored %f\n", j+1, l+1, dbondnorm);
+        }
+      }
+    }
+  }
+  printf("EWALD-SR END\n");
+  // end of PME preparation
+
+  // setup for SCC cycle
+
+  eelold = 1.e10;
+  for (i=0; i<nn; i++)
+    dftb1.qmold[i] = dftb1.qmat[i] = 0.;
+  /* pre-calculate gamma matrix and the derivative
+   * for all atom pairs! */
+  get_gammamat(nn, x, dftb1.izp, dftb->uhubb1, dftb->uhder1, dftb->zeta1,
+		  dftb1.izpxh, dftb1.gammamat, dftb1.gammader);
+
+  /*
+  printf("Gamma matrix:\n");
+  for (i=0; i<3; i++) {
+	  for (j=0; j<3; j++)
+		  printf("%10.6f", dftb1.gammamat[i][j]);
+	  printf("\n");
+  }
+  printf("\n");
+  printf("Gamma deriv:\n");
+  for (i=0; i<3; i++) {
+	  for (j=0; j<3; j++)
+		  printf("%10.6f", dftb1.gammader[i][j]);
+	  printf("\n");
+  }
+  printf("\n");
+  */
+
+  //printf("Number of electrons: dftb1.nel = %d\n", dftb1.nel);
+
+  clock_gettime(CLOCK_MONOTONIC, &time_sccstart);
+  print_time_difference("DFTB PRE TIME:", time_dftbstart, time_sccstart);
+
+  /* SCC cycle starts here */
+  for (niter=0; niter<MAXITER_BROYDEN; niter++) {
+    clock_gettime(CLOCK_MONOTONIC, &time_4);
+    print_time_difference("SCC ITER TIME:", time_3, time_4);
+    time_3 = time_4;
+    /* save old charges */
+    for (i=0; i<nn; i++)
+      dftb1.qmold[i] = dftb1.qmat[i];
+    
+    /*
+    printf("qmat - start SCF:\n");
+    for (i=0; i<nn; i++)
+      printf("%3d %f\n", i+1, dftb1.qmat[i]);
+    */
+
+    /* charge-independent part of H and S */
+    for (j=0; j<nn; j++) {
+      indj = dftb1.ind[j];
+      indj1 = dftb1.ind[j+1];
+      for (k=0; k<nn; k++) {
+        indk = dftb1.ind[k]; 
+        indk1 = dftb1.ind[k+1];
+        for (n=0; n<indk1-indk; n++)
+          for (m=0; m<indj1-indj; m++) {
+            dftb1.a[indj+m][indk+n] = dftb1.hamil[indj+m][indk+n];
+            dftb1.b[indj+m][indk+n] = dftb1.overl[indj+m][indk+n];
+          }
+      }
+    }
+
+    /* calculate the effect of environment with PME
+     * including periodic images of QM charges
+     */
+    if ((dftb->partial_pme == 0) || (niter == 0)) {
+
+      // FULL PME ALWAYS IN THE FIRST ITERATION
+      for (j=0; j<nn; j++) {
+            dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+            dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+            dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+            dftb1.q_pme[j]    = (real) (-dftb1.qmat[j] + dftb->qzero1[dftb1.izp[j]]) * fr->qr->mm->scalefactor;
+            //printf("Charge %d = %f\n", j, (-dftb1.qmat[j] + dftb->qzero1[dftb1.izp[j]]));
+      }
+      for (j=0; j<ne; j++) {
+            dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+            dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+            dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+            dftb1.q_pme[nn + j]    = (real) dftb1.ze[j];
+      }
+      
+      for (j=0; j<nn; j++)
+        dftb1.pot[j] = 0.0;
+      
+      init_nrnb(dftb1.nrnb_pme);
+      clock_gettime(CLOCK_MONOTONIC, &time_1);
+      gmx_pme_do_dftb(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+            	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_pot_only, dftb1.pot);
+      clock_gettime(CLOCK_MONOTONIC, &time_2);
+      print_time_difference("DFTB PME TIME:", time_1, time_2);
+     
+      for (j=0; j<nn; j++) {
+        dftb1.pot[j] *= KJMOL_TO_HARTREE;
+        qold_pme[j] = dftb1.qmat[j];
+        dftb1.pot6[j] = 0;
+      }
+    } else {
+    // LIMITED PME IN EVERY FOLLOWING ITERATION
+      for (j=0; j<nn; j++) {
+            dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+            dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+            dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+            dftb1.q_pme[j]    = (real) (-dftb1.qmat[j] + qold_pme[j]) * fr->qr->mm->scalefactor;
+      }
+
+      clock_gettime(CLOCK_MONOTONIC, &time_1);
+      init_nrnb(dftb1.nrnb_pme);
+      gmx_pme_do_dftb(fr->pmedata, 0, nn, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+            	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_pot_only, dftb1.pot6);
+      clock_gettime(CLOCK_MONOTONIC, &time_2);
+      print_time_difference("DFTB PML TIME:", time_1, time_2);
+
+      for (j=0; j<nn; j++)
+        dftb1.pot6[j] *= KJMOL_TO_HARTREE;
+    }
+
+    // PME -- corrections
+    for (j=0; j<nn; j++) {
+      // exclude the QM-QM interactions as the shift will be calculated in DFTB for these interactions
+      dftb1.pot2[j] = 0.;
+      for (k=0; k<nn; k++)
+        if (j != k) {
+          dvec_sub(dftb1.x[j], dftb1.x[k], bond);
+          dftb1.pot2[j] -= QM_CHARGE(k) * gmx_erf(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR) / dnorm(bond);
+        }
+      dftb1.pot2[j] *= fr->qr->mm->scalefactor;
+      // the "on-site" contribution to energy
+      dftb1.pot3[j] = - 2. * fr->ewaldcoeff_q / NM_TO_BOHR * QM_CHARGE(j) / sqrt(M_PI) * fr->qr->mm->scalefactor;
+    }
+
+    // charge-dependent (modified) Klopman--Ohno correction
+    for (j=0; j<nn; j++)
+      dftb1.pot5[j] = 0.;
+    if (dftb->cdko)
+      //cdkopotential(dftb, &QMMMlist, indexMM);
+      cdkopotential(dftb, box);
+
+    // save the calculated ESP as the external shift
+    for (j=0; j<nn; j++) {
+      dftb1.shiftE[j] = dftb1.pot[j] + dftb1.pot2[j] + dftb1.pot3[j] + dftb1.pot4[j] + dftb1.pot5[j] + dftb1.pot6[j];
+        // * fr->qr->mm->scalefactor; -- DO NOT DO THIS,
+        //                               BECAUSE THIS HAS BEEN DONE IN THE QM/MM INTERFACE
+      //printf("SHIFTE ATOM %3d: %12.7f\n", j+1, dftb1.shiftE[j]);
+      //printf("POT %3d %12.7f %12.7f %12.7f %12.7f\n", j+1, dftb1.pot[j], dftb1.pot2[j], dftb1.pot3[j], dftb1.pot4[j]);
+    }
+    // end PME
+
+    // calculate atomic hamilton shift (= sum over gamma*charge)
+    for (i=0; i<nn; i++) {
+      dftb1.shift[i]   = - dftb1.shiftE[i];
+      dftb1.shift3[i]  = 0.0;
+      dftb1.shift3a[i] = 0.0;
+      for (j=0; j<nn; j++) {
+        // dftb1.shift[i] += - QM_CHARGE(j) * (i>j ? dftb1.gammamat[i][j] : dftb1.gammamat[j][i]);
+        dftb1.shift[i] += - QM_CHARGE(j) * dftb1.gammamat[i][j];
+        if (dftb->sccmode == 3) {
+	  dftb1.shift3[i]  +=   - QM_CHARGE(j)  * dftb1.gammader[i][j];
+	  dftb1.shift3a[i] += SQR(QM_CHARGE(j)) * dftb1.gammader[j][i];
+	}
+      }
+      if (dftb->sccmode == 3)
+	dftb1.shift3[i] *= - QM_CHARGE(i);
+    }
+    /* void hamilshift(int nn, double *qmat, double *qzero, int *izp, double *qdiff, double **gammamat, double **gammader,
+		int sccmode, double *shift, double *shift3, double *shift3a) */
+
+/*
+    printf("Qmat    ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.qmat[i]);
+    printf("\n");
+    printf("Charges ");
+    for (i=0; i<10; i++) printf("%12.6f", QM_CHARGE(i));
+    printf("\n");
+    printf("ShiftE  ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shiftE[i]);
+    printf("\n");
+    printf("Shift   ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shift[i]);
+    printf("\n");
+    printf("Shift3  ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shift3[i]);
+    printf("\n");
+    printf("Shift3A ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shift3a[i]);
+    printf("\n");
+    printf("\n");
+*/
+/*
+    printf("Shift   ");
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shift[i]);
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shift3[i]);
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shift3a[i]);
+    printf("\n");
+    printf("ShiftE  ");
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shiftE[i]);
+    printf("\n");
+    printf("Shift+ShiftE");
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shiftE[i]+dftb1.shift[i]);
+    printf("\n");
+*/
+
+    /* update the Hamilton matrix
+     * shift3 and shift3a == 0 if sccmode != 3
+     */
+    for (i=0; i<nn; i++)
+      for (li=0; li < SQR(dftb->lmax[dftb1.izp[i]]); li++)
+        for (j=0; j<=i; j++)
+          for (lj=0; lj < SQR(dftb->lmax[dftb1.izp[j]]); lj++) 
+            dftb1.a[dftb1.ind[i]+li][dftb1.ind[j]+lj] += 0.5 * dftb1.overl[dftb1.ind[i]+li][dftb1.ind[j]+lj] * 
+		    (dftb1.shift[i] + dftb1.shift[j] /* sccmode == 2 */
+		     + (2.*dftb1.shift3[i] + dftb1.shift3a[i] + 2.*dftb1.shift3[j] + dftb1.shift3a[j]) / 3.); /* sccmode == 3 */
+    
+    // transpose the arrays a and b
+    for (j=0; j<dftb1.ndim; j++)
+      for (i=0; i<dftb1.ndim; i++) {
+        dftb1.a_trans[j*dftb1.ndim+i] = dftb1.a[i][j];
+        dftb1.b_trans[j*dftb1.ndim+i] = dftb1.b[i][j];
+      }
+
+    // print out the array a
+    /*
+    printf("A before dsygv\n");
+    for (i=0; i<10; i++) {
+      for (j=0; j<10; j++) printf ("%9.5f", dftb1.a_trans[i*dftb1.ndim+j]);
+      printf("\n");
+    }
+    */
+
+    ier = -512;
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    // ier = dsygv(1, 'V', 'L', dftb1.ndim, dftb1.a_trans, dftb1.ndim, dftb1.b_trans, dftb1.ndim, dftb1.ev, dftb1.aux, 3*dftb1.ndim);
+    ier = dsygvd(1, 'V', 'L', dftb1.ndim, dftb1.a_trans, dftb1.ndim, dftb1.b_trans, dftb1.ndim, dftb1.ev,
+                 dftb1.aux, 1 + 6*dftb1.ndim + 2*SQR(dftb1.ndim), dftb1.iaux, 3 + 5*dftb1.ndim);
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB DIA TIME:", time_1, time_2);
+    if ((int) ier) {
+      printf("\nDSYGVD: ier = %d\nEXITING!\n\n", (int) ier);
+      exit(-1);
+    }
+    for (j=0; j<dftb1.ndim; j++)
+      for (i=0; i<dftb1.ndim; i++)
+        dftb1.a[i][j] = dftb1.a_trans[j*dftb1.ndim+i];
+
+    /*
+    printf("\n");
+    printf("ier = %d\n", (int) ier);
+    printf("\n");
+    */
+
+    // print out the array a
+    /*
+    printf("A after dsygv\n");
+    for (i=0; i<10; i++) {
+      for (j=0; j<10; j++) printf ("%9.5f", dftb1.a[i][j]);
+      printf("\n");
+    }
+    */
+
+    // calculate occupation (occ) and Fermi energy (efermi),
+    fermi(dftb1.ndim, dftb1.ev, dftb1.occ, &efermi, dftb1.nel, dftb1.telec);
+    // for (i=0; i<dftb1.ndim; i++)
+    //   printf("%d: %f %f\n", i+1, dftb1.ev[i], dftb1.occ[i]);
+
+    // sum of occupied eigenvalues
+    eel = 0.0;
+    for (i=0; i<dftb1.ndim && dftb1.occ[i] > dftb->dacc; i++)
+      eel += dftb1.occ[i] * dftb1.ev[i];
+
+    // determine Mulliken charges, charge of the whole system and the mulliken
+    mulliken(nn, dftb1.qmat, dftb1.qmulli, &qtot, dftb1.ndim, dftb1.occ, dftb1.a, dftb1.overl, dftb1.ind, dftb->lmax, dftb1.izp);
+    
+/*
+    charge_checksum = 0.;
+    for (i=0; i<nn; i++)
+      charge_checksum += QM_CHARGE(i);
+    printf("sum of qmat after mulliken = %f\n", charge_checksum);
+*/
+    /*
+    printf("qmat - after mulliken:\n");
+    for (i=0; i<nn; i++)
+      printf("%3d %f\n", i+1, dftb1.qmat[i]);
+    */
+
+    // complete calculation of electronic energy
+    // charge-dependent contribution
+    // warning: this will only lead to the right result if convergence has been reached
+    ecoul = ecoul3 = eext = 0.0;
+
+    // note, if CDKO calculation is beind done:
+    //   charges have changed, affecting the QM-MM interactions,
+    //   therefore these have to be recalculated now!
+    // charge-dependent (modified) Klopman--Ohno correction
+    if (dftb->cdko) {
+      for (j=0; j<nn; j++)
+        dftb1.pot5[j] = 0.;
+      //cdkopotential(dftb, &QMMMlist, indexMM);
+      cdkopotential(dftb, box);
+      for (j=0; j<nn; j++)
+        dftb1.shiftE[j] = dftb1.pot[j] + dftb1.pot5[j];
+    }
+
+    for (i=0; i<nn; i++) {
+      ecoul  += dftb1.shift[i] * (dftb1.qmat[i] + dftb->qzero1[dftb1.izp[i]]);
+      ecoul3 += dftb1.shift3[i] * (dftb1.qmat[i] + dftb->qzero1[dftb1.izp[i]]) + dftb1.shift3a[i] * dftb1.qmat[i];
+      eext   += dftb1.shiftE[i]  * QM_CHARGE(i);
+      if (dftb->cdko)
+        eext += 2. * (dftb1.shiftE[i] + dftb1.shiftE2[i]) * dftb1.qmat[i]; // the factor of 2 because eext will be divided by 2 below
+    }
+    eel += - ecoul/2. - ecoul3/3. + eext/2.;
+    // remark: eel containts shiftE already via ev,
+    // shift also contains -shiftE, i.e. ecoul also
+    // contains contributions from EXT
+
+    // print energy
+    //printf("iter: %d, E= %14.9f\n", niter, eel);
+
+    // check convergence
+    if (fabs(eel-eelold) < scftol)
+      break;
+    eelold = eel;
+
+    // Broyden mixing
+    broyden(niter, almix, nn, dftb1.qmold, dftb1.qmat, dftb->broyden);
+    for (i=0; i<nn; i++)
+      dftb1.qmat[i] = dftb1.qmold[i];
+
+/*
+    charge_checksum = 0.;
+    for (i=0; i<nn; i++)
+      charge_checksum += QM_CHARGE(i);
+    printf("sum of qmat after broyden = %f\n", charge_checksum);
+*/
+    //printf("qmat - after Broyden:\n");
+    //for (i=0; i<(nn<=10?nn:10); i++)
+      //printf(" %8.5f", dftb1.qmat[i]);
+      //printf("%3d %f\n", i+1, dftb1.qmat[i]);
+      //printf("\n");
+
+  } // end SCC cycle
+/*
+  printf("qmat - after SCC:\n");
+  for (i=0; i<(nn<=10?nn:10); i++)
+    printf(" %8.5f", dftb1.qmat[i]);
+  printf("\n");
+*/
+/*
+  printf("Shift   ");
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shift[i]);
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shift3[i]);
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shift3a[i]);
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shiftE[i]);
+  printf("\n");
+*/
+
+/*
+  printf("SHIFTE ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.shiftE[j]);
+  printf("\n");
+  printf("POT  ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot[j]);
+  printf("\n");
+  printf("POT2 ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot2[j]);
+  printf("\n");
+  printf("POT3 ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot3[j]);
+  printf("\n");
+  printf("POT4 ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot4[j]);
+  printf("\n");
+*/
+
+  printf("final eigenvalues:");
+  for (i=dftb1.nel/2-2; i<dftb1.nel/2; i++)
+    printf(" %8.5f (%5.3f)", dftb1.ev[i], dftb1.occ[i]);
+  printf(" FERMI");
+  for (i=dftb1.nel/2; i<dftb1.nel/2+2; i++)
+    printf(" %8.5f (%5.3f)", dftb1.ev[i], dftb1.occ[i]);
+  printf("\n");
+
+  /* debug */
+  printf ("niter = %d, eel = %12.6f, ecoul = %12.6f, ecoul3 = %12.6f, eext = %12.6f,\n", niter, eel, ecoul, ecoul3, eext);
+  //printf ("niter = %d, eel = %12.6f, ecoul = %12.6f, ecoul3 = %12.6f, eext = %12.6f, ", niter, eel, ecoul, ecoul3, eext);
+
+  clock_gettime(CLOCK_MONOTONIC, &time_sccstop);
+
+  // write out the eigenvalues
+
+/*
+  for (i=0; i<dftb1.ndim; i++)
+    printf("%2d %f\n", i+1, dftb1.ev[i]);
+*/
+  // outspec(nn, dftb1.ndim, dftb1.ind, dftb1.ev, dftb1.occ, efermi, dftb1.qmat, dftb1.qmulli, dftb, dftb1);
+  // outeigenvectors(dftb1.a, dftb1.ev, dftb1.ind, nn, dftb1);
+
+  // printf("%5d %5d / %2d %14.6f\n", 1, 1, niter, eel);
+  // printf("\n***** end of dftb *****\n");
+
+  /* CONTINUE HERE WITH REPULSION AND FORCES! */
+
+  // calculate atomic hamilton shift (= sum over gamma*charge)
+    for (i=0; i<nn; i++) {
+      dftb1.shift[i]   = - dftb1.shiftE[i];
+      dftb1.shift3[i]  = 0.0;
+      dftb1.shift3a[i] = 0.0;
+      for (j=0; j<nn; j++) {
+        // dftb1.shift[i] += - QM_CHARGE(j) * (i>j ? dftb1.gammamat[i][j] : dftb1.gammamat[j][i]);
+        dftb1.shift[i] += - QM_CHARGE(j) * dftb1.gammamat[i][j];
+        if (dftb->sccmode == 3) {
+	  dftb1.shift3[i]  +=   - QM_CHARGE(j)  * dftb1.gammader[i][j];
+	  dftb1.shift3a[i] += SQR(QM_CHARGE(j)) * dftb1.gammader[j][i];
+	}
+      }
+      if (dftb->sccmode == 3)
+	dftb1.shift3[i] *= - QM_CHARGE(i);
+    }
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.grad[i]);
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  usual_gradient(dftb, x, dftb1.partgrad);
+/*
+  printf("gradient components from usual_gradient\n");
+  for (i=0; i<nn; i++)
+    printf("%5d%12.8f%12.8f%12.8f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from usual_gradient\n");
+*/
+  for (i=0; i<nn; i++)
+    copy_dvec(dftb1.partgrad[i], dftb1.grad[i]);
+  printf("GRAD1A");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  gamma_gradient(dftb, x, dftb1.partgrad);
+/*
+  printf("gradient components from gamma_gradient\n");
+  for (i=0; i<nn; i++)
+    printf("%5d%12.8f%12.8f%12.8f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from gamma_gradient\n");
+*/
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+  printf("GRAD1B");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+
+  /* externalchgrad */ /* DO IT HERE WITH PME SOMEHOW - BOTH QM AND MM! */
+
+  /*
+  printf("TEST\n");
+  printf("rlist = %f\n", fr->rlist);
+  printf("rcoulomb = %f\n", fr->rcoulomb);
+  printf("rcoulomb_switch = %f\n", fr->rcoulomb_switch);
+  printf("ewaldcoeff_q = %f\n", fr->ewaldcoeff_q);
+  printf("contrib at cutoff = %f\n", gmx_erfc(fr->ewaldcoeff_q * fr->rcoulomb) / fr->rcoulomb);
+  printf("contrib at 0.7 nm = %f\n", gmx_erfc(fr->ewaldcoeff_q * 0.7) / 0.7);
+  printf("contrib at 0.5 nm = %f\n", gmx_erfc(fr->ewaldcoeff_q * 0.5) / 0.5);
+  printf("END TEST\n");
+  */
+
+  for (j=0; j<nn; j++)
+    clear_dvec(dftb1.partgrad[j]);
+  for (j=0; j<ne; j++)
+    clear_dvec(dftb1.mmgrad[j]);
+
+  /* QM gradient */
+  // the coordinates and charges of QM atoms
+  for (j=0; j<nn; j++) {
+        dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[j]    = (real) QM_CHARGE(j) * fr->qr->mm->scalefactor;
+  }
+  // the coordinates and the charges of MM atoms
+  for (j=0; j<ne; j++) {
+        dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[nn + j]    = (real) dftb1.ze[j];
+  }
+  // PME -- long-range component
+  init_nrnb(dftb1.nrnb_pme);
+  clock_gettime(CLOCK_MONOTONIC, &time_1);
+  gmx_pme_do_dftb(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+      	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_forces, dftb1.pot);
+  clock_gettime(CLOCK_MONOTONIC, &time_2);
+  print_time_difference("DFTB F-1 TIME:", time_1, time_2);
+  for (j=0; j<nn; j++)
+    for (m=0; m<DIM; m++)
+      dftb1.partgrad[j][m] = - dftb1.f_pme[j][m] / HARTREE_BOHR2MD; // partgrad is gradient, i.e. the negative of force
+  printf("GRAD1C");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+  /*
+  printf("gradient components to QM - LR\n");
+  for (i=0; i<nn; i++)
+    printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  */
+  //printf("PME corrections - checkpoint 1\n");
+  // PME corrections -- exclude QM--QM interaction
+  //printf("gradient QM/MM correction - QM--QM exclusions\n");
+  for (j=0; j<nn; j++) {
+    // exclude the QM--QM interactions -- gradient of contribution to potential dftb1.pot2[]
+    // note that the gradient of contribution to potential dftb1.pot3[] vanishes!
+    for (k=0; k<j; k++) {
+      dvec_sub(dftb1.x[j], dftb1.x[k], bond);
+      // negative of gradient -- we want to subtract it from partgrad
+      fscal = QM_CHARGE(j) * QM_CHARGE(k) / SQR(dnorm(bond)) * fr->qr->mm->scalefactor *
+             (gmx_erf(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR) / dnorm(bond)
+              - M_2_SQRTPI * fr->ewaldcoeff_q / NM_TO_BOHR * exp(-SQR(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR)));
+      dsvmul(fscal, bond, dgr); // vec(dgr) = fscal * vec(bond)
+      dvec_inc(dftb1.partgrad[j], dgr);
+      dvec_dec(dftb1.partgrad[k], dgr);
+      //printf("%1d-%1d:%12.7f%12.7f%12.7f\n", j+1, k+1, dgr[XX], dgr[YY], dgr[ZZ]);
+    }
+  }
+  printf("GRAD1D");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+  //printf("PME corrections - checkpoint 2\n");
+
+  /* MM gradient -- LR component to QM/MM */
+  // the coordinates and charges of QM atoms
+  for (j=0; j<nn; j++) {
+        dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[j]    = (real) QM_CHARGE(j) * fr->qr->mm->scalefactor;
+  }
+  // the coordinates and the charges of MM atoms
+  for (j=0; j<ne; j++) {
+        dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[nn + j]    = 0.; // ASK GERRIT IF THIS IS REALLY NOT INCLUDED IN GROMACS MM CALCULATIONS!
+  }
+  /*
+  charge_checksum = 0.;
+  for (j=0; j<nn; j++)
+    charge_checksum += dftb1.q_pme[j];
+  printf("  PME charge checksum QM = %f\n", charge_checksum);
+  charge_checksum = 0.;
+  for (j=nn; j<nn+ne; j++)
+    charge_checksum += dftb1.q_pme[j];
+  printf("  PME charge checksum MM = %f\n", charge_checksum);
+  */
+  //PME
+  init_nrnb(dftb1.nrnb_pme);
+  clock_gettime(CLOCK_MONOTONIC, &time_1);
+  gmx_pme_do_dftb_mm_forces(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+      	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_forces);
+  clock_gettime(CLOCK_MONOTONIC, &time_2);
+  print_time_difference("DFTB F-2 TIME:", time_1, time_2);
+  /*
+  printf("MM FORCES EWALD START\n");
+  for (j=0; j<ne; j++)
+    printf("%5d %9.6f %9.6f %9.6f\n", j+1, dftb1.f_pme[nn + j][XX], dftb1.f_pme[nn + j][YY], dftb1.f_pme[nn + j][ZZ]);
+  printf("MM FORCES EWALD STOP\n");
+  */
+  for (j=0; j<ne; j++) {
+    dftb1.mmgrad[j][XX] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][XX]);
+    dftb1.mmgrad[j][YY] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][YY]);
+    dftb1.mmgrad[j][ZZ] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][ZZ]);
+  } // dsvmul(-dftb1.ze[j] / HARTREE_BOHR2MD, dftb1.f_pme[nn + j], dftb1.mmgrad[j]);
+  //printf("PME corrections - checkpoint 3\n");
+
+  //printf("gradient components to MM - LR\n");
+  //for (i=0; i<10; i++)
+  //  if (dnorm(dftb1.mmgrad[i]) > 0.00001)
+  //    printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.mmgrad[i][XX], dftb1.mmgrad[i][YY], dftb1.mmgrad[i][ZZ]);
+
+  for (j=0; j<nn; j++) { // do it for every QM atom
+    // add SR potential only from MM atoms in the neighbor list!
+    for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+      l = dftb1.neighbor_pme[j][k];
+      pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+      dbondnorm = dnorm(bond);
+      if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+        printf("QM/MM PME QM--MM short range exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+      } else {
+        if (dbondnorm < fr->rcoulomb * NM_TO_BOHR) {
+          fscal = QM_CHARGE(j) * dftb1.ze[l] / SQR(dbondnorm) * fr->qr->mm->scalefactor *
+                 (- (double) gmx_erfc(fr->ewaldcoeff_q * dbondnorm / NM_TO_BOHR) / dbondnorm
+                  - M_2_SQRTPI * (double) fr->ewaldcoeff_q / NM_TO_BOHR * exp(-SQR((double) fr->ewaldcoeff_q * dbondnorm / NM_TO_BOHR)));
+          dsvmul(fscal, bond, dgr);
+          //printf("SR: QM %1d -- MM %1d:%12.7f%12.7f%12.7f\n", j+1, k+1, dgr[XX], dgr[YY], dgr[ZZ]);
+          // short-range QM/MM contribution to QM gradient
+          dvec_inc(dftb1.partgrad[j], dgr);
+          // short-range QM/MM contribution to MM gradient
+          dvec_dec(dftb1.mmgrad[l], dgr);
+        }
+      }
+    }
+  }
+  printf("GRAD1E");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+  //printf("\nPME corrections - checkpoint 4\n");
+
+  /*
+  printf("gradient components to MM - complete\n");
+  for (k=0; k<ne; k++)
+    if (dnorm(dftb1.mmgrad[k]) > 0.00001)
+      printf("%3d%12.7f%12.7f%12.7f\n", k+1, dftb1.mmgrad[k][XX], dftb1.mmgrad[k][YY], dftb1.mmgrad[k][ZZ]);
+  */
+  /*
+  printf("gradient components to QM - complete\n");
+  for (i=0; i<nn; i++)
+    printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  */
+
+  // end PME
+
+/*
+  printf("TEST - CUT-OFF QM/MM GRADIENTS ON MM ATOMS!\n");
+  for (k=0; k<dftb1.ne; k++) {
+    clear_dvec(dgr_cumul);
+    for (j=0; j<nn; j++) {
+      dvec_sub(dftb1.x[j], dftb1.xe[k], bond);
+      fscal = - (double) QM_CHARGE(j) * dftb1.ze[k] / CUB(dnorm(bond));
+      dsvmul(fscal, bond, dgr);
+      dvec_dec(dgr_cumul, dgr);
+    }
+    if (dnorm2(dgr_cumul) > 0.0000001)
+      printf("%3d%12.7f%12.7f%12.7f\n", k+1, dgr_cumul[XX], dgr_cumul[YY], dgr_cumul[ZZ]);
+  }
+  printf("TEST - CUT-OFF QM/MM GRADIENTS ON QM ATOMS!\n");
+  for (j=0; j<nn; j++) {
+    clear_dvec(dgr_cumul);
+    for (k=0; k<dftb1.ne; k++) {
+      dvec_sub(dftb1.x[j], dftb1.xe[k], bond);
+      fscal = - (double) QM_CHARGE(j) * dftb1.ze[k] / CUB(dnorm(bond));
+      dsvmul(fscal, bond, dgr);
+      dvec_inc(dgr_cumul, dgr);
+    }
+    if (dnorm2(dgr_cumul) > 0.0000001)
+      printf("%3d%12.7f%12.7f%12.7f\n", j+1, dgr_cumul[XX], dgr_cumul[YY], dgr_cumul[ZZ]);
+  }
+*/
+
+  // end PME
+  
+/*
+  // debug output
+  printf("gradient components from externalchgrad\n");
+  for (i=0; i<nn; i++)
+    printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from externalchgrad\n");
+*/
+
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  erep = repulsive(dftb, x, dftb1.partgrad);
+  printf("GRAD1F");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+/*
+  printf("gradient components from repulsive\n");
+  for (i=0; i<nn; i++)
+    printf("%5d%12.8f%12.8f%12.8f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from repulsive\n");
+*/
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+
+  // ADD THE DISPERSION INTERACTION (IF DESIRED)
+  if (dftb->dispersion) {
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    for (i=0; i<nn; i++)
+      clear_dvec(dftb1.partgrad[i]);
+    switch (dftb->dispersion) {
+      case 1: /* Grimme's DFT-D3 */
+              edisp = dispersion_dftd3(dftb, dftb1.partgrad);
+              break;
+      case 2: /* Elstner's 2001, not yet implemented, cannot happen... */
+              edisp = 0.;
+              break;
+    }
+    for (i=0; i<nn; i++)
+      dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB DIS TIME:", time_1, time_2);
+  } else {
+    edisp = 0.;
+  }
+  printf("GRAD1G");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+
+  // ADD THE CORRECTION DUE TO CHARGE-DEPENDENT KLOPMAN--OHNO INTERACTION (IF DESIRED)
+  if (dftb->cdko) {
+    // clear the arrays for the gradients
+    for (i=0; i<nn; i++)
+      clear_dvec(dftb1.partgrad[i]);
+    for (i=0; i<ne; i++)
+      clear_dvec(dftb1.partmmgrad[i]);
+    //cdkograd(dftb, &QMMMlist, indexMM, x, xe, partgrad, partmmgrad);
+    cdkograd(dftb, box, dftb1.partgrad, dftb1.partmmgrad);
+    for (i=0; i<nn; i++)
+      dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+    for (i=0; i<ne; i++)
+      dvec_inc(dftb1.mmgrad[i], dftb1.partmmgrad[i]);
+  }
+
+  printf ("erep = %16.10f      eel+erep = %16.10f     edisp = %16.10f\n", erep, eel+erep, edisp);
+
+  /* copy gradients to the corresponding arrays - copied from call_gaussian() */
+  //printf("begin forces\n");
+  printf("GRAD1");
+  for(i=0; i<nn; i++) {
+    printf(" %12.7f", dnorm(dftb1.grad[i]));
+    for(j=0; j<DIM; j++) {
+      f[i][j]      = (real) HARTREE_BOHR2MD * dftb1.grad[i][j];
+      fshift[i][j] = (real) HARTREE_BOHR2MD * dftb1.grad[i][j];
+    }
+  }
+  printf("\n");
+  //printf("  end forces\n");
+  printf("GRADMM1");
+  for(i=0; i<ne; i++) {
+    if (dnorm(dftb1.mmgrad[i]) > 0.002)
+      printf(" %12.7f", dnorm(dftb1.mmgrad[i]));
+    for(j=0; j<DIM; j++) {
+      f[i + nn][j]      = (real) HARTREE_BOHR2MD * dftb1.mmgrad[i][j];      
+      fshift[i + nn][j] = (real) HARTREE_BOHR2MD * dftb1.mmgrad[i][j];
+    }
+  }
+  printf("\n");
+
+  clock_gettime(CLOCK_MONOTONIC, &time_dftbstop);
+  print_time_difference("DFTB FRC TIME:", time_sccstop, time_dftbstop);
+  print_time_difference("DFTB     TIME:", time_dftbstart, time_dftbstop);
+
+  esave = eel + erep + edisp;
+
+  /* SWAP COORDINATES FOR THE OTHER CALCULATION */
+  copy_dvec(x[0], xsave);
+  copy_dvec(x[7], x[0]);
+  copy_dvec(xsave, x[7]);
+  copy_dvec(x[5], xsave);
+  copy_dvec(x[1], x[5]);
+  copy_dvec(xsave, x[1]);
+  copy_dvec(x[2], xsave);
+  copy_dvec(x[6], x[2]);
+  copy_dvec(xsave, x[6]);
+
+  /* RECALCULATE EVERYTHING */
+
+  do_neighborlist_for_dftb(dftb, box);
+
+  eel = 0.0;
+  for (j=0; j<nn; j++)
+    for (k=0; k<=j; k++) {
+      slkmatrices(j, k, x, dftb1.au, dftb1.bu, dftb->lmax, dftb->dim1, dftb->dr1, dftb1.izp, dftb->skstab1, dftb->skhtab1, dftb->skself1);
+      for (n=0; n<dftb1.ind[k+1]-dftb1.ind[k]; n++)
+        for (m=0; m<dftb1.ind[j+1]-dftb1.ind[j]; m++) {
+          dftb1.hamil[dftb1.ind[j]+m][dftb1.ind[k]+n] = dftb1.au[m][n];
+          dftb1.hamil[dftb1.ind[k]+n][dftb1.ind[j]+m] = dftb1.au[m][n];      
+          dftb1.overl[dftb1.ind[j]+m][dftb1.ind[k]+n] = dftb1.bu[m][n];
+          dftb1.overl[dftb1.ind[k]+n][dftb1.ind[j]+m] = dftb1.bu[m][n];
+	}
+    }
+
+  // QM/MM PME preparation -- short-range QM--MM component (real-space)
+  // using fr->ewaldcoeff_q, which has the dimension of 1/distance
+  for (j=0; j<nn; j++) { // do it for every QM atom
+    dftb1.pot4[j] = 0.;
+    // add SR potential only from MM atoms in the neighbor list!
+    for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+      l = dftb1.neighbor_pme[j][k];
+      pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+      dbondnorm = dnorm(bond);
+      if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+      } else {
+        if (dbondnorm < fr->rcoulomb * NM_TO_BOHR) {
+          dftb1.pot4[j] += dftb1.ze[l] / dbondnorm * (double) gmx_erfc(fr->ewaldcoeff_q * (real) dbondnorm / NM_TO_BOHR);
+        }
+      }
+    }
+  }
+
+  eelold = 1.e10;
+  for (i=0; i<nn; i++)
+    dftb1.qmold[i] = dftb1.qmat[i] = 0.;
+  /* pre-calculate gamma matrix and the derivative
+   * for all atom pairs! */
+  get_gammamat(nn, x, dftb1.izp, dftb->uhubb1, dftb->uhder1, dftb->zeta1,
+		  dftb1.izpxh, dftb1.gammamat, dftb1.gammader);
+
+  /* SCC cycle starts here */
+  for (niter=0; niter<MAXITER_BROYDEN; niter++) {
+    /* save old charges */
+    for (i=0; i<nn; i++)
+      dftb1.qmold[i] = dftb1.qmat[i];
+    
+    /* charge-independent part of H and S */
+    for (j=0; j<nn; j++) {
+      indj = dftb1.ind[j];
+      indj1 = dftb1.ind[j+1];
+      for (k=0; k<nn; k++) {
+        indk = dftb1.ind[k]; 
+        indk1 = dftb1.ind[k+1];
+        for (n=0; n<indk1-indk; n++)
+          for (m=0; m<indj1-indj; m++) {
+            dftb1.a[indj+m][indk+n] = dftb1.hamil[indj+m][indk+n];
+            dftb1.b[indj+m][indk+n] = dftb1.overl[indj+m][indk+n];
+          }
+      }
+    }
+
+    /* calculate the effect of environment with PME
+     * including periodic images of QM charges
+     */
+    if ((dftb->partial_pme == 0) || (niter == 0)) {
+
+      // FULL PME ALWAYS IN THE FIRST ITERATION
+      for (j=0; j<nn; j++) {
+            dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+            dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+            dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+            dftb1.q_pme[j]    = (real) (-dftb1.qmat[j] + dftb->qzero1[dftb1.izp[j]]) * fr->qr->mm->scalefactor;
+      }
+      for (j=0; j<ne; j++) {
+            dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+            dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+            dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+            dftb1.q_pme[nn + j]    = (real) dftb1.ze[j];
+      }
+      
+      for (j=0; j<nn; j++)
+        dftb1.pot[j] = 0.0;
+      
+      init_nrnb(dftb1.nrnb_pme);
+      gmx_pme_do_dftb(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+            	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_pot_only, dftb1.pot);
+     
+      for (j=0; j<nn; j++) {
+        dftb1.pot[j] *= KJMOL_TO_HARTREE;
+        qold_pme[j] = dftb1.qmat[j];
+        dftb1.pot6[j] = 0;
+      }
+    } else {
+    // LIMITED PME IN EVERY FOLLOWING ITERATION
+      for (j=0; j<nn; j++) {
+            dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+            dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+            dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+            dftb1.q_pme[j]    = (real) (-dftb1.qmat[j] + qold_pme[j]) * fr->qr->mm->scalefactor;
+      }
+
+      init_nrnb(dftb1.nrnb_pme);
+      gmx_pme_do_dftb(fr->pmedata, 0, nn, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+            	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_pot_only, dftb1.pot6);
+
+      for (j=0; j<nn; j++)
+        dftb1.pot6[j] *= KJMOL_TO_HARTREE;
+    }
+
+    // PME -- corrections
+    for (j=0; j<nn; j++) {
+      // exclude the QM-QM interactions as the shift will be calculated in DFTB for these interactions
+      dftb1.pot2[j] = 0.;
+      for (k=0; k<nn; k++)
+        if (j != k) {
+          dvec_sub(dftb1.x[j], dftb1.x[k], bond);
+          dftb1.pot2[j] -= QM_CHARGE(k) * gmx_erf(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR) / dnorm(bond);
+        }
+      dftb1.pot2[j] *= fr->qr->mm->scalefactor;
+      // the "on-site" contribution to energy
+      dftb1.pot3[j] = - 2. * fr->ewaldcoeff_q / NM_TO_BOHR * QM_CHARGE(j) / sqrt(M_PI) * fr->qr->mm->scalefactor;
+    }
+
+    // charge-dependent (modified) Klopman--Ohno correction
+    for (j=0; j<nn; j++)
+      dftb1.pot5[j] = 0.;
+
+    // save the calculated ESP as the external shift
+    for (j=0; j<nn; j++) {
+      dftb1.shiftE[j] = dftb1.pot[j] + dftb1.pot2[j] + dftb1.pot3[j] + dftb1.pot4[j] + dftb1.pot5[j] + dftb1.pot6[j];
+    }
+
+    // calculate atomic hamilton shift (= sum over gamma*charge)
+    for (i=0; i<nn; i++) {
+      dftb1.shift[i]   = - dftb1.shiftE[i];
+      dftb1.shift3[i]  = 0.0;
+      dftb1.shift3a[i] = 0.0;
+      for (j=0; j<nn; j++) {
+        dftb1.shift[i] += - QM_CHARGE(j) * dftb1.gammamat[i][j];
+        if (dftb->sccmode == 3) {
+	  dftb1.shift3[i]  +=   - QM_CHARGE(j)  * dftb1.gammader[i][j];
+	  dftb1.shift3a[i] += SQR(QM_CHARGE(j)) * dftb1.gammader[j][i];
+	}
+      }
+      if (dftb->sccmode == 3)
+	dftb1.shift3[i] *= - QM_CHARGE(i);
+    }
+
+    /* update the Hamilton matrix */
+    for (i=0; i<nn; i++)
+      for (li=0; li < SQR(dftb->lmax[dftb1.izp[i]]); li++)
+        for (j=0; j<=i; j++)
+          for (lj=0; lj < SQR(dftb->lmax[dftb1.izp[j]]); lj++) 
+            dftb1.a[dftb1.ind[i]+li][dftb1.ind[j]+lj] += 0.5 * dftb1.overl[dftb1.ind[i]+li][dftb1.ind[j]+lj] * 
+		    (dftb1.shift[i] + dftb1.shift[j] /* sccmode == 2 */
+		     + (2.*dftb1.shift3[i] + dftb1.shift3a[i] + 2.*dftb1.shift3[j] + dftb1.shift3a[j]) / 3.); /* sccmode == 3 */
+    
+    // transpose the arrays a and b
+    for (j=0; j<dftb1.ndim; j++)
+      for (i=0; i<dftb1.ndim; i++) {
+        dftb1.a_trans[j*dftb1.ndim+i] = dftb1.a[i][j];
+        dftb1.b_trans[j*dftb1.ndim+i] = dftb1.b[i][j];
+      }
+
+    ier = -512;
+    ier = dsygvd(1, 'V', 'L', dftb1.ndim, dftb1.a_trans, dftb1.ndim, dftb1.b_trans, dftb1.ndim, dftb1.ev,
+                 dftb1.aux, 1 + 6*dftb1.ndim + 2*SQR(dftb1.ndim), dftb1.iaux, 3 + 5*dftb1.ndim);
+    if ((int) ier) {
+      printf("\nDSYGVD: ier = %d\nEXITING!\n\n", (int) ier);
+      exit(-1);
+    }
+    for (j=0; j<dftb1.ndim; j++)
+      for (i=0; i<dftb1.ndim; i++)
+        dftb1.a[i][j] = dftb1.a_trans[j*dftb1.ndim+i];
+
+    // calculate occupation (occ) and Fermi energy (efermi),
+    fermi(dftb1.ndim, dftb1.ev, dftb1.occ, &efermi, dftb1.nel, dftb1.telec);
+
+    // sum of occupied eigenvalues
+    eel = 0.0;
+    for (i=0; i<dftb1.ndim && dftb1.occ[i] > dftb->dacc; i++)
+      eel += dftb1.occ[i] * dftb1.ev[i];
+
+    // determine Mulliken charges, charge of the whole system and the mulliken
+    mulliken(nn, dftb1.qmat, dftb1.qmulli, &qtot, dftb1.ndim, dftb1.occ, dftb1.a, dftb1.overl, dftb1.ind, dftb->lmax, dftb1.izp);
+    
+    ecoul = ecoul3 = eext = 0.0;
+
+    for (i=0; i<nn; i++) {
+      ecoul  += dftb1.shift[i] * (dftb1.qmat[i] + dftb->qzero1[dftb1.izp[i]]);
+      ecoul3 += dftb1.shift3[i] * (dftb1.qmat[i] + dftb->qzero1[dftb1.izp[i]]) + dftb1.shift3a[i] * dftb1.qmat[i];
+      eext   += dftb1.shiftE[i]  * QM_CHARGE(i);
+    }
+    eel += - ecoul/2. - ecoul3/3. + eext/2.;
+
+    // check convergence
+    if (fabs(eel-eelold) < scftol)
+      break;
+    eelold = eel;
+
+    // Broyden mixing
+    broyden(niter, almix, nn, dftb1.qmold, dftb1.qmat, dftb->broyden);
+    for (i=0; i<nn; i++)
+      dftb1.qmat[i] = dftb1.qmold[i];
+
+  } // end SCC cycle
+
+
+  printf("final eigenvalues:");
+  for (i=dftb1.nel/2-2; i<dftb1.nel/2; i++)
+    printf(" %8.5f (%5.3f)", dftb1.ev[i], dftb1.occ[i]);
+  printf(" FERMI");
+  for (i=dftb1.nel/2; i<dftb1.nel/2+2; i++)
+    printf(" %8.5f (%5.3f)", dftb1.ev[i], dftb1.occ[i]);
+  printf("\n");
+
+  /* debug */
+  printf ("niter = %d, eel = %12.6f, ecoul = %12.6f, ecoul3 = %12.6f, eext = %12.6f,\n", niter, eel, ecoul, ecoul3, eext);
+
+  // calculate atomic hamilton shift (= sum over gamma*charge)
+    for (i=0; i<nn; i++) {
+      dftb1.shift[i]   = - dftb1.shiftE[i];
+      dftb1.shift3[i]  = 0.0;
+      dftb1.shift3a[i] = 0.0;
+      for (j=0; j<nn; j++) {
+        dftb1.shift[i] += - QM_CHARGE(j) * dftb1.gammamat[i][j];
+        if (dftb->sccmode == 3) {
+	  dftb1.shift3[i]  +=   - QM_CHARGE(j)  * dftb1.gammader[i][j];
+	  dftb1.shift3a[i] += SQR(QM_CHARGE(j)) * dftb1.gammader[j][i];
+	}
+      }
+      if (dftb->sccmode == 3)
+	dftb1.shift3[i] *= - QM_CHARGE(i);
+    }
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.grad[i]);
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  usual_gradient(dftb, x, dftb1.partgrad);
+  for (i=0; i<nn; i++)
+    copy_dvec(dftb1.partgrad[i], dftb1.grad[i]);
+  printf("GRAD2A");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  gamma_gradient(dftb, x, dftb1.partgrad);
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+  printf("GRAD2B");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+
+  for (j=0; j<nn; j++)
+    clear_dvec(dftb1.partgrad[j]);
+  for (j=0; j<ne; j++)
+    clear_dvec(dftb1.mmgrad[j]);
+
+  /* QM gradient */
+  // the coordinates and charges of QM atoms
+  for (j=0; j<nn; j++) {
+        dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[j]    = (real) QM_CHARGE(j) * fr->qr->mm->scalefactor;
+  }
+  // the coordinates and the charges of MM atoms
+  for (j=0; j<ne; j++) {
+        dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[nn + j]    = (real) dftb1.ze[j];
+  }
+  // PME -- long-range component
+  init_nrnb(dftb1.nrnb_pme);
+  gmx_pme_do_dftb(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+      	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_forces, dftb1.pot);
+  for (j=0; j<nn; j++)
+    for (m=0; m<DIM; m++)
+      dftb1.partgrad[j][m] = - dftb1.f_pme[j][m] / HARTREE_BOHR2MD; // partgrad is gradient, i.e. the negative of force
+  printf("GRAD2C");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+
+  for (j=0; j<nn; j++) {
+    for (k=0; k<j; k++) {
+      dvec_sub(dftb1.x[j], dftb1.x[k], bond);
+      // negative of gradient -- we want to subtract it from partgrad
+      fscal = QM_CHARGE(j) * QM_CHARGE(k) / SQR(dnorm(bond)) * fr->qr->mm->scalefactor *
+             (gmx_erf(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR) / dnorm(bond)
+              - M_2_SQRTPI * fr->ewaldcoeff_q / NM_TO_BOHR * exp(-SQR(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR)));
+      dsvmul(fscal, bond, dgr); // vec(dgr) = fscal * vec(bond)
+      dvec_inc(dftb1.partgrad[j], dgr);
+      dvec_dec(dftb1.partgrad[k], dgr);
+    }
+  }
+  printf("GRAD2D");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+
+  /* MM gradient -- LR component to QM/MM */
+  // the coordinates and charges of QM atoms
+  for (j=0; j<nn; j++) {
+        dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[j]    = (real) QM_CHARGE(j) * fr->qr->mm->scalefactor;
+  }
+  // the coordinates and the charges of MM atoms
+  for (j=0; j<ne; j++) {
+        dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[nn + j]    = 0.; // ASK GERRIT IF THIS IS REALLY NOT INCLUDED IN GROMACS MM CALCULATIONS!
+  }
+  init_nrnb(dftb1.nrnb_pme);
+  gmx_pme_do_dftb_mm_forces(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, box, cr, 0, 0,
+      	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_forces);
+  for (j=0; j<ne; j++) {
+    dftb1.mmgrad[j][XX] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][XX]);
+    dftb1.mmgrad[j][YY] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][YY]);
+    dftb1.mmgrad[j][ZZ] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][ZZ]);
+  }
+
+  for (j=0; j<nn; j++) { // do it for every QM atom
+    // add SR potential only from MM atoms in the neighbor list!
+    for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+      l = dftb1.neighbor_pme[j][k];
+      pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+      dbondnorm = dnorm(bond);
+      if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+      } else {
+        if (dbondnorm < fr->rcoulomb * NM_TO_BOHR) {
+          fscal = QM_CHARGE(j) * dftb1.ze[l] / SQR(dbondnorm) * fr->qr->mm->scalefactor *
+                 (- (double) gmx_erfc(fr->ewaldcoeff_q * dbondnorm / NM_TO_BOHR) / dbondnorm
+                  - M_2_SQRTPI * (double) fr->ewaldcoeff_q / NM_TO_BOHR * exp(-SQR((double) fr->ewaldcoeff_q * dbondnorm / NM_TO_BOHR)));
+          dsvmul(fscal, bond, dgr);
+          dvec_inc(dftb1.partgrad[j], dgr);
+          dvec_dec(dftb1.mmgrad[l], dgr);
+        }
+      }
+    }
+  }
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+  printf("GRAD2E");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  erep = repulsive(dftb, x, dftb1.partgrad);
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+  printf("GRAD2F");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+
+  // ADD THE DISPERSION INTERACTION (IF DESIRED)
+  if (dftb->dispersion) {
+    for (i=0; i<nn; i++)
+      clear_dvec(dftb1.partgrad[i]);
+    switch (dftb->dispersion) {
+      case 1: /* Grimme's DFT-D3 */
+              edisp = dispersion_dftd3(dftb, dftb1.partgrad);
+              break;
+      case 2: /* Elstner's 2001, not yet implemented, cannot happen... */
+              edisp = 0.;
+              break;
+    }
+    for (i=0; i<nn; i++)
+      dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+  } else {
+    edisp = 0.;
+  }
+  printf("GRAD2G");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.partgrad[i]));
+  printf("\n");
+
+  printf ("erep = %16.10f      eel+erep = %16.10f     edisp = %16.10f\n", erep, eel+erep, edisp);
+
+  printf("GRAD2");
+  for(i=0; i<nn; i++)
+    printf(" %12.7f", dnorm(dftb1.grad[i]));
+  printf("\n");
+  printf("GRADMM2");
+  for (i=0; i<ne; i++)
+    if (dnorm(dftb1.mmgrad[i]) > 0.002)
+      printf(" %12.7f", dnorm(dftb1.mmgrad[i]));
+  printf("\n");
+
+  return HARTREE2KJ * AVOGADRO * esave; // (eel + erep + edisp);
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_eglcao.c.stable gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_eglcao.c.stable
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_eglcao.c.stable	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_eglcao.c.stable	2014-09-02 21:41:25.000000000 +0200
@@ -0,0 +1,989 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include<time.h>
+//#include"charge_transfer.h"
+#include"qm_dftb.h"
+
+/*      PROGRAM DYLCAO */
+/*     ================ */
+
+/*     Copyright 1991 by Peter Blaudeck, Dirk Porezag */
+
+/* ********************************************************************* */
+
+/*     PROGRAM CHARACTERISTICS */
+/*     ----------------------- */
+
+/* DYLCAO calculates the dynamics of various systems */
+/* within a two-centre SETB formalism */
+
+/* ********************************************************************* */
+/*
+
+     PHASE 1 FOR CHARGE TRANSFER -- CALC OF FRAGMENTS
+
+*/
+/* ********************************************************************* */
+
+
+/*     SUBROUTINE EGLCAO */
+/*     ================= */
+
+/*     Copyright 1997 by Peter Blaudeck, Dirk Porezag, Michael Haugk, */
+/*                       Joachim Elsner */
+/*     Bo Song for CMD-QM/MM based on Fragments, April 2007 */
+
+/* ********************************************************************* */
+
+/*     PROGRAM CHARACTERISTICS */
+/*     ----------------------- */
+
+/* eglcao calculates energy and gradient for dylcao as shown by Seifert. */
+/* The determination of the occupation numbers has been changed to be */
+/* also valid for metallic systems. */
+
+/* PARAMETERS: */
+/* nn      i  number of atoms */
+/* x       r  coordinates (n3) */
+/* eel     r  electronic energy */
+/* miter   i  number of scf-iterations performed */
+/* qmat    r */
+
+/* ********************************************************************* */
+
+// missing in include/vec.h somehow...
+static gmx_inline void dvec_dec(dvec a,const dvec b)
+{
+  double x,y,z;
+
+  x=a[XX]-b[XX];
+  y=a[YY]-b[YY];
+  z=a[ZZ]-b[ZZ];
+
+  a[XX]=x;
+  a[YY]=y;
+  a[ZZ]=z;
+}
+
+// adopted from src/gmxlib/pbc.c
+static inline void pbc_dx_dftb(matrix box, const dvec x1, const dvec x2, dvec dx)
+{
+    int i;
+    double length;
+
+    for(i=0; i<DIM; i++) {
+        dx[i] = x1[i] - x2[i];
+        length = (double) box[i][i] * NM_TO_BOHR;
+        while (dx[i] > length / 2.) {
+            dx[i] -= length;
+        }
+        while (dx[i] < - length / 2.) {
+            dx[i] += length;
+        }
+    }
+
+    return;
+}
+
+// lapack routine(s)
+static long dsygv(long itype, char jobz, char uplo, long n, double *a, long lda,
+             double *b, long ldb, double *w, double *work, long lwork)
+{
+  extern void dsygv_(long *, char *, char *, long *, double *, long *, double *,
+                long *, double *, double *, long *, long *);
+  long info;
+  dsygv_(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work, &lwork, &info);
+  return info;
+}
+
+static long dsygvd(long itype, char jobz, char uplo, long n, double *a, long lda,
+             double *b, long ldb, double *w, double *work, long lwork, long *iwork, long liwork)
+{
+  extern void dsygvd_(long *, char *, char *, long *, double *, long *, double *,
+                long *, double *, double *, long *, long *, long *, long *);
+  long info;
+  dsygvd_(&itype, &jobz, &uplo, &n, a, &lda, b, &ldb, w, work, &lwork, iwork, &liwork, &info);
+  return info;
+}
+
+void print_time_difference(char *s, struct timespec start, struct timespec end)
+{
+  int sec, nsec;
+  long long value=0ll;
+
+  value = 1000000000ll * ((long long) end.tv_sec - (long long) start.tv_sec) + (long long) (end.tv_nsec - start.tv_nsec);
+  printf("%s %12lld\n", s, value);
+
+  return;
+}  
+
+/* NDIM = NORB !!! */
+
+double run_dftb1(dftb_t *dftb, rvec f[], rvec fshift[], t_commrec *cr, t_forcerec *fr, matrix box) // i - nucleobase to be calculated
+// int eglcao(int nn, double x[NNDIM][3], double *eel, int *miter, double qmat[NNDIM], int phase)
+{
+  const double scftol = 1.e-9;
+  const double almix = 0.2;
+  //const int maxiter = 70;
+  
+  int indj, indk, indj1, indk1;
+  double eel, ecoul, ecoul3, efermi, eelold, eext, erep, edisp;
+  //
+  int i, j, k, l, m, n, li, lj, niter, xx, yy, zz;
+  //int nmaofo, ii, jj, kk, ll, jfo, jao;
+  //double r2;
+  double qtot, r;
+  // lapack
+  long ier; // ndim;
+  char c;
+
+  int nn, ne;
+  dvec *x, bond, dgr, dgr_cumul;
+  //real bondnorm, bondnorm2, invbondnorm;
+  double dbondnorm;
+  dftb_phase1_t dftb1;
+
+  // for PME
+  double charge_checksum, fscal;
+  matrix vir_pme;
+  real energy_pme[1];
+  const int flags_pme_pot_only = GMX_PME_SPREAD | GMX_PME_SOLVE | GMX_PME_CALC_ENER_VIR | GMX_PME_CALC_POT;
+  const int flags_pme_forces = GMX_PME_SPREAD | GMX_PME_SOLVE | GMX_PME_CALC_ENER_VIR | GMX_PME_CALC_F;
+  t_nblist QMMMlist = fr->QMMMlist;
+  energy_pme[0] = (real) 1.0;
+  // static int initialized_pme = 0;
+  // t_gmx_pme *pmedata;
+  t_pbc pbc;
+  //rvec x_pbc, xe_pbc, rbond;
+  int status;
+  static int mdstep = -1;
+
+  static struct timespec time_dftbstart, time_dftbstop, time_1, time_2, time_3, time_4, time_sccstart, time_sccstop;
+
+  clock_gettime(CLOCK_MONOTONIC, &time_dftbstart);
+  print_time_difference("MM+ETC   TIME:", time_dftbstop, time_dftbstart);
+
+  // printf("Phase 1, site %d\n", ibase+1);
+
+  dftb1 = dftb->phase1;
+  nn = dftb1.nn;
+  ne = dftb1.ne;
+  x = dftb1.x;
+
+  // check if the manual QM/MM neighborsearching shall be done
+  mdstep++;
+  if (mdstep - dftb->lastlist_pme >= dftb->nstlist_pme) {
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    do_neighborlist_for_dftb(dftb, box);
+    dftb->lastlist_pme = mdstep;
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB NS  TIME:", time_1, time_2);
+  }
+
+  /* LENGTHS MUST BE CONVERTED TO BOHR! (5.292d-11 m) */
+
+/*
+  // write out the coordinates
+  printf("%d\ntest coordinates\n", nn);
+  for (n=0; n<nn; n++) {
+    switch (dftb1.izp[n]) {case 0: c='O';break; case 1: c='H';break; case 2: c='C';break; case 3: c='N';break;} 
+    printf("%c %f %f %f\n", c, x[n][XX]*0.529177249, x[n][YY]*0.529177249, x[n][ZZ]*0.529177249);
+  }
+*/
+/*
+  // write out the external charges
+  for (n=0; n<ne; n++) {
+    printf("%f %f %f %f\n", dftb1.xe[n][XX]*0.529177249, dftb1.xe[n][YY]*0.529177249, dftb1.xe[n][ZZ]*0.529177249, dftb1.ze[n]);
+  }
+*/
+  
+  /* set the initial charges
+  for (i=0; i<nn; i++)
+    dftb1.qmat[i] = dftb->qzero1[dftb1.izp[i]];
+  THIS IS NOW DONE ONLY ONCE AT THE DFTB INITIALIZATION
+  AND NOT REPEATED ANY FURTHER!
+  --> POSSIBLE SPEEDUP!
+  */
+
+  // initial setup
+
+  //set_pbc(&pbc, epbcXYZ, box);
+
+  eel = 0.0;
+
+  // printf("  icycle iter niter   e(total)\n");
+  // printf("====================================\n");
+
+  // setup of charge-independent part of H and S
+  for (j=0; j<nn; j++)
+    for (k=0; k<=j; k++) {
+      slkmatrices(j, k, x, dftb1.au, dftb1.bu, dftb->lmax, dftb->dim1, dftb->dr1, dftb1.izp, dftb->skstab1, dftb->skhtab1, dftb->skself1);
+      for (n=0; n<dftb1.ind[k+1]-dftb1.ind[k]; n++)
+        for (m=0; m<dftb1.ind[j+1]-dftb1.ind[j]; m++) {
+          dftb1.hamil[dftb1.ind[j]+m][dftb1.ind[k]+n] = dftb1.au[m][n];
+          dftb1.hamil[dftb1.ind[k]+n][dftb1.ind[j]+m] = dftb1.au[m][n];      
+          dftb1.overl[dftb1.ind[j]+m][dftb1.ind[k]+n] = dftb1.bu[m][n];
+          dftb1.overl[dftb1.ind[k]+n][dftb1.ind[j]+m] = dftb1.bu[m][n];
+	}
+    }
+
+/*
+  printf("Hamiltonian matrix:\n");
+  for (j=0; j<6; j++) {
+    for (k=0; k<6; k++) printf("%10.6f", dftb1.hamil[j][k]);
+    printf("\n");
+  }
+  printf("Overlap matrix:\n");
+  for (j=0; j<6; j++) {
+    for (k=0; k<6; k++) printf("%10.6f", dftb1.overl[j][k]);
+    printf("\n");
+  }
+*/
+
+  // QM/MM PME preparation -- short-range QM--MM component (real-space)
+  // using fr->ewaldcoeff_q, which has the dimension of 1/distance
+  printf("EWALD-SR START\n");
+  for (j=0; j<nn; j++) { // do it for every QM atom
+    dftb1.pot4[j] = 0.;
+    // add SR potential only from MM atoms in the neighbor list!
+    for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+      l = dftb1.neighbor_pme[j][k];
+      pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+      dbondnorm = dnorm(bond);
+      if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+        printf("QM/MM PME QM--MM short range exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+      } else {
+        if (dbondnorm < fr->rcoulomb * NM_TO_BOHR) {
+          dftb1.pot4[j] += dftb1.ze[l] / dbondnorm * (double) gmx_erfc(fr->ewaldcoeff_q * (real) dbondnorm / NM_TO_BOHR);
+          ////printf("Ewald-SR for QM=%d, MM=%d: contrib = %f\n", j+1, l+1, dftb1.ze[l] / dnorm(bond) * gmx_erfc(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR));
+	  //printf("Ewald-SR for QM=%d, MM=%d: contrib %f\n", j+1, l+1, dbondnorm);
+        } else {
+	  //printf("Ewald-SR for QM=%d, MM=%d: ignored %f\n", j+1, l+1, dbondnorm);
+        }
+      }
+    }
+  }
+  printf("EWALD-SR END\n");
+  // end of PME preparation
+
+  // setup for SCC cycle
+
+  eelold = 1.e10;
+  /* pre-calculate gamma matrix and the derivative
+   * for all atom pairs! */
+  get_gammamat(nn, x, dftb1.izp, dftb->uhubb1, dftb->uhder1, dftb->zeta1,
+		  dftb1.izpxh, dftb1.gammamat, dftb1.gammader);
+
+  /*
+  printf("Gamma matrix:\n");
+  for (i=0; i<3; i++) {
+	  for (j=0; j<3; j++)
+		  printf("%10.6f", dftb1.gammamat[i][j]);
+	  printf("\n");
+  }
+  printf("\n");
+  printf("Gamma deriv:\n");
+  for (i=0; i<3; i++) {
+	  for (j=0; j<3; j++)
+		  printf("%10.6f", dftb1.gammader[i][j]);
+	  printf("\n");
+  }
+  printf("\n");
+  */
+
+  //printf("Number of electrons: dftb1.nel = %d\n", dftb1.nel);
+
+  clock_gettime(CLOCK_MONOTONIC, &time_sccstart);
+  print_time_difference("DFTB PRE TIME:", time_dftbstart, time_sccstart);
+
+  /* SCC cycle starts here */
+  for (niter=0; niter<MAXITER_BROYDEN; niter++) {
+    clock_gettime(CLOCK_MONOTONIC, &time_4);
+    print_time_difference("SCC ITER TIME:", time_3, time_4);
+    time_3 = time_4;
+    /* save old charges */
+    for (i=0; i<nn; i++)
+      dftb1.qmold[i] = dftb1.qmat[i];
+    
+    /*
+    printf("qmat - start SCF:\n");
+    for (i=0; i<nn; i++)
+      printf("%3d %f\n", i+1, dftb1.qmat[i]);
+    */
+
+    /* charge-independent part of H and S */
+    for (j=0; j<nn; j++) {
+      indj = dftb1.ind[j];
+      indj1 = dftb1.ind[j+1];
+      for (k=0; k<nn; k++) {
+        indk = dftb1.ind[k]; 
+        indk1 = dftb1.ind[k+1];
+        for (n=0; n<indk1-indk; n++)
+          for (m=0; m<indj1-indj; m++) {
+            dftb1.a[indj+m][indk+n] = dftb1.hamil[indj+m][indk+n];
+            dftb1.b[indj+m][indk+n] = dftb1.overl[indj+m][indk+n];
+          }
+      }
+    }
+
+    // calculate the effect of environment with PME
+    // including periodic images of QM charges
+    //printf("\n PME within SCC \n\n");
+    for (j=0; j<nn; j++) {
+          dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+          dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+          dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+  	  dftb1.q_pme[j]    = (real) (-dftb1.qmat[j] + dftb->qzero1[dftb1.izp[j]]);
+          //printf("Charge %d = %f\n", j, (-dftb1.qmat[j] + dftb->qzero1[dftb1.izp[j]]));
+    }
+    // the coordinates and the charges of MM atoms
+    for (j=0; j<ne; j++) {
+          dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+          dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+          dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+          dftb1.q_pme[nn + j]    = (real) dftb1.ze[j];
+    }
+    /*
+    file = fopen("test_pme_coords.xvg", "w");
+    for (j=0; j<nn + ne; j++)
+      fprintf(file, "%5d %12.7f %12.7f %12.7f\n", j+1, dftb1.x_pme[j][0], dftb1.x_pme[j][1], dftb1.x_pme[j][2]);
+    fclose(file);
+    */
+
+    for (j=0; j<nn; j++)
+      dftb1.pot[j] = 0.0;
+    // PME -- long-range component
+    init_nrnb(dftb1.nrnb_pme);
+    /*
+    charge_checksum = 0.;
+    for (j=0; j<nn; j++)
+      charge_checksum += dftb1.q_pme[j];
+    printf("  PME charge checksum QM = %f\n", charge_checksum);
+    charge_checksum = 0.;
+    for (j=nn; j<nn+ne; j++)
+      charge_checksum += dftb1.q_pme[j];
+    printf("  PME charge checksum MM = %f\n", charge_checksum);
+    printf("DEBUG_PME: %d %f %f %f %f %d\n", nn+ne, box[0][0], box[1][1], box[2][2], fr->ewaldcoeff_q, flags_pme_pot_only);
+    for (j=0; j<10; j++)
+      printf("QM %2d = %12.7f %12.7f %12.7f\n", j, dftb1.x_pme[j][0], dftb1.x_pme[j][1], dftb1.x_pme[j][2]);
+    for (j=0; j<10; j++)
+      printf("MM %2d = %12.7f %12.7f %12.7f\n", j, dftb1.x_pme[nn+j][0], dftb1.x_pme[nn+j][1], dftb1.x_pme[nn+j][2]);
+    */
+
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    gmx_pme_do_dftb(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, dftb1.q_pme, box, cr, 0, 0,
+  		  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_pot_only, dftb1.pot);
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB PME TIME:", time_1, time_2);
+    //printf("POT  ");
+    //for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot[j]);
+    //printf("\n");
+
+    for (j=0; j<nn; j++)
+      dftb1.pot[j] *= KJMOL_TO_HARTREE;
+
+    // PME -- corrections
+    for (j=0; j<nn; j++) {
+      // exclude the QM-QM interactions as the shift will be calculated in DFTB for these interactions
+      dftb1.pot2[j] = 0.;
+      for (k=0; k<nn; k++)
+        if (j != k) {
+          dvec_sub(dftb1.x[j], dftb1.x[k], bond);
+          dftb1.pot2[j] -= QM_CHARGE(k) * gmx_erf(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR) / dnorm(bond);
+        }
+      // the "on-site" contribution to energy
+      dftb1.pot3[j] = - 2. * fr->ewaldcoeff_q / NM_TO_BOHR * QM_CHARGE(j) / sqrt(M_PI);
+    }
+
+    // charge-dependent (modified) Klopman--Ohno correction
+    for (j=0; j<nn; j++)
+      dftb1.pot5[j] = 0.;
+    if (dftb->cdko)
+      //cdkopotential(dftb, &QMMMlist, indexMM);
+      cdkopotential(dftb, box);
+
+    // save the calculated ESP as the external shift
+    for (j=0; j<nn; j++) {
+      dftb1.shiftE[j] = dftb1.pot[j] + dftb1.pot2[j] + dftb1.pot3[j] + dftb1.pot4[j] + dftb1.pot5[j];
+        // * fr->qr->mm->scalefactor; -- DO NOT DO THIS,
+        //                               BECAUSE THIS HAS BEEN DONE IN THE QM/MM INTERFACE
+      //printf("SHIFTE ATOM %3d: %12.7f\n", j+1, dftb1.shiftE[j]);
+      //printf("POT %3d %12.7f %12.7f %12.7f %12.7f\n", j+1, dftb1.pot[j], dftb1.pot2[j], dftb1.pot3[j], dftb1.pot4[j]);
+    }
+    // end PME
+
+    // calculate atomic hamilton shift (= sum over gamma*charge)
+    for (i=0; i<nn; i++) {
+      dftb1.shift[i]   = - dftb1.shiftE[i];
+      dftb1.shift3[i]  = 0.0;
+      dftb1.shift3a[i] = 0.0;
+      for (j=0; j<nn; j++) {
+        // dftb1.shift[i] += - QM_CHARGE(j) * (i>j ? dftb1.gammamat[i][j] : dftb1.gammamat[j][i]);
+        dftb1.shift[i] += - QM_CHARGE(j) * dftb1.gammamat[i][j];
+        if (dftb->sccmode == 3) {
+	  dftb1.shift3[i]  +=   - QM_CHARGE(j)  * dftb1.gammader[i][j];
+	  dftb1.shift3a[i] += SQR(QM_CHARGE(j)) * dftb1.gammader[j][i];
+	}
+      }
+      if (dftb->sccmode == 3)
+	dftb1.shift3[i] *= - QM_CHARGE(i);
+    }
+    /* void hamilshift(int nn, double *qmat, double *qzero, int *izp, double *qdiff, double **gammamat, double **gammader,
+		int sccmode, double *shift, double *shift3, double *shift3a) */
+
+/*
+    printf("Qmat    ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.qmat[i]);
+    printf("\n");
+    printf("Charges ");
+    for (i=0; i<10; i++) printf("%12.6f", QM_CHARGE(i));
+    printf("\n");
+    printf("ShiftE  ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shiftE[i]);
+    printf("\n");
+    printf("Shift   ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shift[i]);
+    printf("\n");
+    printf("Shift3  ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shift3[i]);
+    printf("\n");
+    printf("Shift3A ");
+    for (i=0; i<10; i++) printf("%12.6f", dftb1.shift3a[i]);
+    printf("\n");
+    printf("\n");
+*/
+/*
+    printf("Shift   ");
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shift[i]);
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shift3[i]);
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shift3a[i]);
+    printf("\n");
+    printf("ShiftE  ");
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shiftE[i]);
+    printf("\n");
+    printf("Shift+ShiftE");
+    for (i=0; i<3; i++) printf("%8.5f", dftb1.shiftE[i]+dftb1.shift[i]);
+    printf("\n");
+*/
+
+    /* update the Hamilton matrix
+     * shift3 and shift3a == 0 if sccmode != 3
+     */
+    for (i=0; i<nn; i++)
+      for (li=0; li < SQR(dftb->lmax[dftb1.izp[i]]); li++)
+        for (j=0; j<=i; j++)
+          for (lj=0; lj < SQR(dftb->lmax[dftb1.izp[j]]); lj++) 
+            dftb1.a[dftb1.ind[i]+li][dftb1.ind[j]+lj] += 0.5 * dftb1.overl[dftb1.ind[i]+li][dftb1.ind[j]+lj] * 
+		    (dftb1.shift[i] + dftb1.shift[j] /* sccmode == 2 */
+		     + (2.*dftb1.shift3[i] + dftb1.shift3a[i] + 2.*dftb1.shift3[j] + dftb1.shift3a[j]) / 3.); /* sccmode == 3 */
+    
+    // transpose the arrays a and b
+    for (j=0; j<dftb1.ndim; j++)
+      for (i=0; i<dftb1.ndim; i++) {
+        dftb1.a_trans[j*dftb1.ndim+i] = dftb1.a[i][j];
+        dftb1.b_trans[j*dftb1.ndim+i] = dftb1.b[i][j];
+      }
+
+    // print out the array a
+    /*
+    printf("A before dsygv\n");
+    for (i=0; i<10; i++) {
+      for (j=0; j<10; j++) printf ("%9.5f", dftb1.a_trans[i*dftb1.ndim+j]);
+      printf("\n");
+    }
+    */
+
+    ier = -512;
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    // ier = dsygv(1, 'V', 'L', dftb1.ndim, dftb1.a_trans, dftb1.ndim, dftb1.b_trans, dftb1.ndim, dftb1.ev, dftb1.aux, 3*dftb1.ndim);
+    ier = dsygvd(1, 'V', 'L', dftb1.ndim, dftb1.a_trans, dftb1.ndim, dftb1.b_trans, dftb1.ndim, dftb1.ev,
+                 dftb1.aux, 1 + 6*dftb1.ndim + 2*SQR(dftb1.ndim), dftb1.iaux, 3 + 5*dftb1.ndim);
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB DIA TIME:", time_1, time_2);
+    if ((int) ier) {
+      printf("\nDSYGVD: ier = %d\nEXITING!\n\n", (int) ier);
+      exit(-1);
+    }
+    for (j=0; j<dftb1.ndim; j++)
+      for (i=0; i<dftb1.ndim; i++)
+        dftb1.a[i][j] = dftb1.a_trans[j*dftb1.ndim+i];
+
+    /*
+    printf("\n");
+    printf("ier = %d\n", (int) ier);
+    printf("\n");
+    */
+
+    // print out the array a
+    /*
+    printf("A after dsygv\n");
+    for (i=0; i<10; i++) {
+      for (j=0; j<10; j++) printf ("%9.5f", dftb1.a[i][j]);
+      printf("\n");
+    }
+    */
+
+    // calculate occupation (occ) and Fermi energy (efermi),
+    fermi(dftb1.ndim, dftb1.ev, dftb1.occ, &efermi, dftb1.nel, dftb1.telec);
+    // for (i=0; i<dftb1.ndim; i++)
+    //   printf("%d: %f %f\n", i+1, dftb1.ev[i], dftb1.occ[i]);
+
+    // sum of occupied eigenvalues
+    eel = 0.0;
+    for (i=0; i<dftb1.ndim && dftb1.occ[i] > dftb->dacc; i++)
+      eel += dftb1.occ[i] * dftb1.ev[i];
+
+    // determine Mulliken charges, charge of the whole system and the mulliken
+    mulliken(nn, dftb1.qmat, dftb1.qmulli, &qtot, dftb1.ndim, dftb->dacc, dftb1.occ, dftb1.a, dftb1.overl, dftb1.ind, dftb->lmax, dftb1.izp);
+    
+/*
+    charge_checksum = 0.;
+    for (i=0; i<nn; i++)
+      charge_checksum += QM_CHARGE(i);
+    printf("sum of qmat after mulliken = %f\n", charge_checksum);
+*/
+    /*
+    printf("qmat - after mulliken:\n");
+    for (i=0; i<nn; i++)
+      printf("%3d %f\n", i+1, dftb1.qmat[i]);
+    */
+
+    // complete calculation of electronic energy
+    // charge-dependent contribution
+    // warning: this will only lead to the right result if convergence has been reached
+    ecoul = ecoul3 = eext = 0.0;
+
+    // note, if CDKO calculation is beind done:
+    //   charges have changed, affecting the QM-MM interactions,
+    //   therefore these have to be recalculated now!
+    // charge-dependent (modified) Klopman--Ohno correction
+    if (dftb->cdko) {
+      for (j=0; j<nn; j++)
+        dftb1.pot5[j] = 0.;
+      //cdkopotential(dftb, &QMMMlist, indexMM);
+      cdkopotential(dftb, box);
+      for (j=0; j<nn; j++)
+        dftb1.shiftE[j] = dftb1.pot[j] + dftb1.pot5[j];
+    }
+
+    for (i=0; i<nn; i++) {
+      ecoul  += dftb1.shift[i] * (dftb1.qmat[i] + dftb->qzero1[dftb1.izp[i]]);
+      ecoul3 += dftb1.shift3[i] * (dftb1.qmat[i] + dftb->qzero1[dftb1.izp[i]]) + dftb1.shift3a[i] * dftb1.qmat[i];
+      eext   += dftb1.shiftE[i]  * QM_CHARGE(i);
+      if (dftb->cdko)
+        eext += 2. * (dftb1.shiftE[i] + dftb1.shiftE2[i]) * dftb1.qmat[i]; // the factor of 2 because eext will be divided by 2 below
+    }
+    eel += - ecoul/2. - ecoul3/3. + eext/2.;
+    // remark: eel containts shiftE already via ev,
+    // shift also contains -shiftE, i.e. ecoul also
+    // contains contributions from EXT
+
+    // print energy
+    //printf("iter: %d, E= %14.9f\n", niter, eel);
+
+    // check convergence
+    if (fabs(eel-eelold) < scftol)
+      break;
+    eelold = eel;
+
+    // Broyden mixing
+    broyden(niter, almix, nn, dftb1.qmold, dftb1.qmat, dftb->broyden);
+    for (i=0; i<nn; i++)
+      dftb1.qmat[i] = dftb1.qmold[i];
+
+/*
+    charge_checksum = 0.;
+    for (i=0; i<nn; i++)
+      charge_checksum += QM_CHARGE(i);
+    printf("sum of qmat after broyden = %f\n", charge_checksum);
+*/
+    //printf("qmat - after Broyden:\n");
+    //for (i=0; i<(nn<=10?nn:10); i++)
+      //printf(" %8.5f", dftb1.qmat[i]);
+      //printf("%3d %f\n", i+1, dftb1.qmat[i]);
+      //printf("\n");
+
+  } // end SCC cycle
+/*
+  printf("qmat - after SCC:\n");
+  for (i=0; i<(nn<=10?nn:10); i++)
+    printf(" %8.5f", dftb1.qmat[i]);
+  printf("\n");
+*/
+/*
+  printf("Shift   ");
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shift[i]);
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shift3[i]);
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shift3a[i]);
+  for (i=0; i<3; i++) printf(" %8.5f", dftb1.shiftE[i]);
+  printf("\n");
+*/
+
+/*
+  printf("SHIFTE ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.shiftE[j]);
+  printf("\n");
+  printf("POT  ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot[j]);
+  printf("\n");
+  printf("POT2 ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot2[j]);
+  printf("\n");
+  printf("POT3 ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot3[j]);
+  printf("\n");
+  printf("POT4 ");
+  for (j=0; j<nn; j++) printf("%12.7f", dftb1.pot4[j]);
+  printf("\n");
+*/
+
+  printf("final eigenvalues:");
+  for (i=dftb1.nel/2-2; i<dftb1.nel/2; i++)
+    printf(" %8.5f (%5.3f)", dftb1.ev[i], dftb1.occ[i]);
+  printf(" FERMI");
+  for (i=dftb1.nel/2; i<dftb1.nel/2+2; i++)
+    printf(" %8.5f (%5.3f)", dftb1.ev[i], dftb1.occ[i]);
+  printf("\n");
+
+  /* debug */
+  printf ("niter = %d, eel = %12.6f, ecoul = %12.6f, ecoul3 = %12.6f, eext = %12.6f,\n", niter, eel, ecoul, ecoul3, eext);
+  //printf ("niter = %d, eel = %12.6f, ecoul = %12.6f, ecoul3 = %12.6f, eext = %12.6f, ", niter, eel, ecoul, ecoul3, eext);
+
+  clock_gettime(CLOCK_MONOTONIC, &time_sccstop);
+
+  // write out the eigenvalues
+
+/*
+  for (i=0; i<dftb1.ndim; i++)
+    printf("%2d %f\n", i+1, dftb1.ev[i]);
+*/
+  // outspec(nn, dftb1.ndim, dftb1.ind, dftb1.ev, dftb1.occ, efermi, dftb1.qmat, dftb1.qmulli, dftb, dftb1);
+  // outeigenvectors(dftb1.a, dftb1.ev, dftb1.ind, nn, dftb1);
+
+  // printf("%5d %5d / %2d %14.6f\n", 1, 1, niter, eel);
+  // printf("\n***** end of dftb *****\n");
+
+  /* CONTINUE HERE WITH REPULSION AND FORCES! */
+
+  // calculate atomic hamilton shift (= sum over gamma*charge)
+    for (i=0; i<nn; i++) {
+      dftb1.shift[i]   = - dftb1.shiftE[i];
+      dftb1.shift3[i]  = 0.0;
+      dftb1.shift3a[i] = 0.0;
+      for (j=0; j<nn; j++) {
+        // dftb1.shift[i] += - QM_CHARGE(j) * (i>j ? dftb1.gammamat[i][j] : dftb1.gammamat[j][i]);
+        dftb1.shift[i] += - QM_CHARGE(j) * dftb1.gammamat[i][j];
+        if (dftb->sccmode == 3) {
+	  dftb1.shift3[i]  +=   - QM_CHARGE(j)  * dftb1.gammader[i][j];
+	  dftb1.shift3a[i] += SQR(QM_CHARGE(j)) * dftb1.gammader[j][i];
+	}
+      }
+      if (dftb->sccmode == 3)
+	dftb1.shift3[i] *= - QM_CHARGE(i);
+    }
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.grad[i]);
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  usual_gradient(dftb, x, dftb1.partgrad);
+/*
+  printf("gradient components from usual_gradient\n");
+  for (i=0; i<nn; i++)
+    printf("%5d%12.8f%12.8f%12.8f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from usual_gradient\n");
+*/
+  for (i=0; i<nn; i++)
+    copy_dvec(dftb1.partgrad[i], dftb1.grad[i]);
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  gamma_gradient(dftb, x, dftb1.partgrad);
+/*
+  printf("gradient components from gamma_gradient\n");
+  for (i=0; i<nn; i++)
+    printf("%5d%12.8f%12.8f%12.8f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from gamma_gradient\n");
+*/
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+
+  /* externalchgrad */ /* DO IT HERE WITH PME SOMEHOW - BOTH QM AND MM! */
+
+  /*
+  printf("TEST\n");
+  printf("rlist = %f\n", fr->rlist);
+  printf("rcoulomb = %f\n", fr->rcoulomb);
+  printf("rcoulomb_switch = %f\n", fr->rcoulomb_switch);
+  printf("ewaldcoeff_q = %f\n", fr->ewaldcoeff_q);
+  printf("contrib at cutoff = %f\n", gmx_erfc(fr->ewaldcoeff_q * fr->rcoulomb) / fr->rcoulomb);
+  printf("contrib at 0.7 nm = %f\n", gmx_erfc(fr->ewaldcoeff_q * 0.7) / 0.7);
+  printf("contrib at 0.5 nm = %f\n", gmx_erfc(fr->ewaldcoeff_q * 0.5) / 0.5);
+  printf("END TEST\n");
+  */
+
+  for (j=0; j<nn; j++)
+    clear_dvec(dftb1.partgrad[j]);
+  for (j=0; j<ne; j++)
+    clear_dvec(dftb1.mmgrad[j]);
+
+  /* QM gradient */
+  // the coordinates and charges of QM atoms
+  for (j=0; j<nn; j++) {
+        dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[j]    = (real) QM_CHARGE(j);
+  }
+  // the coordinates and the charges of MM atoms
+  for (j=0; j<ne; j++) {
+        dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[nn + j]    = (real) dftb1.ze[j];
+  }
+  // PME -- long-range component
+  init_nrnb(dftb1.nrnb_pme);
+  clock_gettime(CLOCK_MONOTONIC, &time_1);
+  gmx_pme_do_dftb(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, dftb1.q_pme, box, cr, 0, 0,
+      	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_forces, dftb1.pot);
+  clock_gettime(CLOCK_MONOTONIC, &time_2);
+  print_time_difference("DFTB F-1 TIME:", time_1, time_2);
+  for (j=0; j<nn; j++)
+    for (m=0; m<DIM; m++)
+      dftb1.partgrad[j][m] = - dftb1.f_pme[j][m] / HARTREE_BOHR2MD; // partgrad is gradient, i.e. the negative of force
+  /*
+  printf("gradient components to QM - LR\n");
+  for (i=0; i<nn; i++)
+    printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  */
+  //printf("PME corrections - checkpoint 1\n");
+  // PME corrections -- exclude QM--QM interaction
+  //printf("gradient QM/MM correction - QM--QM exclusions\n");
+  for (j=0; j<nn; j++) {
+    // exclude the QM--QM interactions -- gradient of contribution to potential dftb1.pot2[]
+    // note that the gradient of contribution to potential dftb1.pot3[] vanishes!
+    for (k=0; k<j; k++) {
+      dvec_sub(dftb1.x[j], dftb1.x[k], bond);
+      // negative of gradient -- we want to subtract it from partgrad
+      fscal = QM_CHARGE(j) * QM_CHARGE(k) / SQR(dnorm(bond)) *
+             (gmx_erf(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR) / dnorm(bond)
+              - M_2_SQRTPI * fr->ewaldcoeff_q / NM_TO_BOHR * exp(-SQR(fr->ewaldcoeff_q * dnorm(bond) / NM_TO_BOHR)));
+      dsvmul(fscal, bond, dgr); // vec(dgr) = fscal * vec(bond)
+      dvec_inc(dftb1.partgrad[j], dgr);
+      dvec_dec(dftb1.partgrad[k], dgr);
+      //printf("%1d-%1d:%12.7f%12.7f%12.7f\n", j+1, k+1, dgr[XX], dgr[YY], dgr[ZZ]);
+    }
+  }
+  //printf("PME corrections - checkpoint 2\n");
+
+  /* MM gradient -- LR component to QM/MM */
+  // the coordinates and charges of QM atoms
+  for (j=0; j<nn; j++) {
+        dftb1.x_pme[j][0] = (real) dftb1.x[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[j][1] = (real) dftb1.x[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[j][2] = (real) dftb1.x[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[j]    = (real) QM_CHARGE(j);
+  }
+  // the coordinates and the charges of MM atoms
+  for (j=0; j<ne; j++) {
+        dftb1.x_pme[nn + j][0] = (real) dftb1.xe[j][0] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][1] = (real) dftb1.xe[j][1] / NM_TO_BOHR;
+        dftb1.x_pme[nn + j][2] = (real) dftb1.xe[j][2] / NM_TO_BOHR;
+        dftb1.q_pme[nn + j]    = 0.; // ASK GERRIT IF THIS IS REALLY NOT INCLUDED IN GROMACS MM CALCULATIONS!
+  }
+  /*
+  charge_checksum = 0.;
+  for (j=0; j<nn; j++)
+    charge_checksum += dftb1.q_pme[j];
+  printf("  PME charge checksum QM = %f\n", charge_checksum);
+  charge_checksum = 0.;
+  for (j=nn; j<nn+ne; j++)
+    charge_checksum += dftb1.q_pme[j];
+  printf("  PME charge checksum MM = %f\n", charge_checksum);
+  */
+  //PME
+  init_nrnb(dftb1.nrnb_pme);
+  clock_gettime(CLOCK_MONOTONIC, &time_1);
+  gmx_pme_do_dftb_mm_forces(fr->pmedata, 0, nn+ne, dftb1.x_pme, dftb1.f_pme, dftb1.q_pme, dftb1.q_pme, box, cr, 0, 0,
+      	  dftb1.nrnb_pme, vir_pme, fr->ewaldcoeff_q, energy_pme, flags_pme_forces);
+  clock_gettime(CLOCK_MONOTONIC, &time_2);
+  print_time_difference("DFTB F-2 TIME:", time_1, time_2);
+  /*
+  printf("MM FORCES EWALD START\n");
+  for (j=0; j<ne; j++)
+    printf("%5d %9.6f %9.6f %9.6f\n", j+1, dftb1.f_pme[nn + j][XX], dftb1.f_pme[nn + j][YY], dftb1.f_pme[nn + j][ZZ]);
+  printf("MM FORCES EWALD STOP\n");
+  */
+  for (j=0; j<ne; j++) {
+    dftb1.mmgrad[j][XX] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][XX]);
+    dftb1.mmgrad[j][YY] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][YY]);
+    dftb1.mmgrad[j][ZZ] = (double) (-dftb1.ze[j] / HARTREE_BOHR2MD * dftb1.f_pme[nn + j][ZZ]);
+  } // dsvmul(-dftb1.ze[j] / HARTREE_BOHR2MD, dftb1.f_pme[nn + j], dftb1.mmgrad[j]);
+  //printf("PME corrections - checkpoint 3\n");
+
+  //printf("gradient components to MM - LR\n");
+  //for (i=0; i<10; i++)
+  //  if (dnorm(dftb1.mmgrad[i]) > 0.00001)
+  //    printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.mmgrad[i][XX], dftb1.mmgrad[i][YY], dftb1.mmgrad[i][ZZ]);
+
+  for (j=0; j<nn; j++) { // do it for every QM atom
+    // add SR potential only from MM atoms in the neighbor list!
+    for (k=0; k<dftb1.neighbors_pme[j]; k++) {
+      l = dftb1.neighbor_pme[j][k];
+      pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[l], bond);
+      dbondnorm = dnorm(bond);
+      if (dbondnorm < 0.001) { // this may occur on the first step of simulation for link atom(s)
+        printf("QM/MM PME QM--MM short range exploding for QM=%d, MM=%d. MM charge is %f\n", j+1, l+1, dftb1.ze[l]);
+      } else {
+        if (dbondnorm < fr->rcoulomb * NM_TO_BOHR) {
+          fscal = QM_CHARGE(j) * dftb1.ze[l] / SQR(dbondnorm) *
+                 (- (double) gmx_erfc(fr->ewaldcoeff_q * dbondnorm / NM_TO_BOHR) / dbondnorm
+                  - M_2_SQRTPI * (double) fr->ewaldcoeff_q / NM_TO_BOHR * exp(-SQR((double) fr->ewaldcoeff_q * dbondnorm / NM_TO_BOHR)));
+          dsvmul(fscal, bond, dgr);
+          //printf("SR: QM %1d -- MM %1d:%12.7f%12.7f%12.7f\n", j+1, k+1, dgr[XX], dgr[YY], dgr[ZZ]);
+          // short-range QM/MM contribution to QM gradient
+          dvec_inc(dftb1.partgrad[j], dgr);
+          // short-range QM/MM contribution to MM gradient
+          dvec_dec(dftb1.mmgrad[l], dgr);
+        }
+      }
+    }
+  }
+  //printf("\nPME corrections - checkpoint 4\n");
+
+  /*
+  printf("gradient components to MM - complete\n");
+  for (k=0; k<ne; k++)
+    if (dnorm(dftb1.mmgrad[k]) > 0.00001)
+      printf("%3d%12.7f%12.7f%12.7f\n", k+1, dftb1.mmgrad[k][XX], dftb1.mmgrad[k][YY], dftb1.mmgrad[k][ZZ]);
+  */
+  /*
+  printf("gradient components to QM - complete\n");
+  for (i=0; i<nn; i++)
+    printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  */
+
+  // end PME
+
+/*
+  printf("TEST - CUT-OFF QM/MM GRADIENTS ON MM ATOMS!\n");
+  for (k=0; k<dftb1.ne; k++) {
+    clear_dvec(dgr_cumul);
+    for (j=0; j<nn; j++) {
+      dvec_sub(dftb1.x[j], dftb1.xe[k], bond);
+      fscal = - (double) QM_CHARGE(j) * dftb1.ze[k] / CUB(dnorm(bond));
+      dsvmul(fscal, bond, dgr);
+      dvec_dec(dgr_cumul, dgr);
+    }
+    if (dnorm2(dgr_cumul) > 0.0000001)
+      printf("%3d%12.7f%12.7f%12.7f\n", k+1, dgr_cumul[XX], dgr_cumul[YY], dgr_cumul[ZZ]);
+  }
+  printf("TEST - CUT-OFF QM/MM GRADIENTS ON QM ATOMS!\n");
+  for (j=0; j<nn; j++) {
+    clear_dvec(dgr_cumul);
+    for (k=0; k<dftb1.ne; k++) {
+      dvec_sub(dftb1.x[j], dftb1.xe[k], bond);
+      fscal = - (double) QM_CHARGE(j) * dftb1.ze[k] / CUB(dnorm(bond));
+      dsvmul(fscal, bond, dgr);
+      dvec_inc(dgr_cumul, dgr);
+    }
+    if (dnorm2(dgr_cumul) > 0.0000001)
+      printf("%3d%12.7f%12.7f%12.7f\n", j+1, dgr_cumul[XX], dgr_cumul[YY], dgr_cumul[ZZ]);
+  }
+*/
+
+  // end PME
+  
+/*
+  // debug output
+  printf("gradient components from externalchgrad\n");
+  for (i=0; i<nn; i++)
+    printf("%3d%12.7f%12.7f%12.7f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from externalchgrad\n");
+*/
+
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+
+  for (i=0; i<nn; i++)
+    clear_dvec(dftb1.partgrad[i]);
+  erep = repulsive(dftb, x, dftb1.partgrad);
+/*
+  printf("gradient components from repulsive\n");
+  for (i=0; i<nn; i++)
+    printf("%5d%12.8f%12.8f%12.8f\n", i+1, dftb1.partgrad[i][XX], dftb1.partgrad[i][YY], dftb1.partgrad[i][ZZ]);
+  printf("end gradient components from repulsive\n");
+*/
+  for (i=0; i<nn; i++)
+    dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+
+  // ADD THE DISPERSION INTERACTION (IF DESIRED)
+  if (dftb->dispersion) {
+    clock_gettime(CLOCK_MONOTONIC, &time_1);
+    for (i=0; i<nn; i++)
+      clear_dvec(dftb1.partgrad[i]);
+    switch (dftb->dispersion) {
+      case 1: /* Grimme's DFT-D3 */
+              edisp = dispersion_dftd3(dftb, x, dftb1.partgrad);
+              break;
+      case 2: /* Elstner's 2001, not yet implemented, cannot happen... */
+              edisp = 0.;
+              break;
+    }
+    for (i=0; i<nn; i++)
+      dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+    clock_gettime(CLOCK_MONOTONIC, &time_2);
+    print_time_difference("DFTB DIS TIME:", time_1, time_2);
+  } else {
+    edisp = 0.;
+  }
+
+  // ADD THE CORRECTION DUE TO CHARGE-DEPENDENT KLOPMAN--OHNO INTERACTION (IF DESIRED)
+  if (dftb->cdko) {
+    // clear the arrays for the gradients
+    for (i=0; i<nn; i++)
+      clear_dvec(dftb1.partgrad[i]);
+    for (i=0; i<ne; i++)
+      clear_dvec(dftb1.partmmgrad[i]);
+    //cdkograd(dftb, &QMMMlist, indexMM, x, xe, partgrad, partmmgrad);
+    cdkograd(dftb, box, dftb1.partgrad, dftb1.partmmgrad);
+    for (i=0; i<nn; i++)
+      dvec_inc(dftb1.grad[i], dftb1.partgrad[i]);
+    for (i=0; i<ne; i++)
+      dvec_inc(dftb1.mmgrad[i], dftb1.partmmgrad[i]);
+  }
+
+  printf ("erep = %16.10f      eel+erep = %16.10f     edisp = %16.10f\n", erep, eel+erep, edisp);
+
+  /* copy gradients to the corresponding arrays - copied from call_gaussian() */
+  //printf("begin forces\n");
+  for(i=0; i<nn; i++) {
+    /*
+    printf("GRAD %3d%12.7f%12.7f%12.7f\n", i+1, -dftb1.grad[i][XX], -dftb1.grad[i][YY], -dftb1.grad[i][ZZ]);
+    */
+    for(j=0; j<DIM; j++) {
+      f[i][j]      = (real) HARTREE_BOHR2MD * dftb1.grad[i][j];
+      fshift[i][j] = (real) HARTREE_BOHR2MD * dftb1.grad[i][j];
+    }
+  }
+  //printf("  end forces\n");
+  for(i=0; i<ne; i++) {
+    /*
+    if (SQR(dftb1.mmgrad[i][XX]) + SQR(dftb1.mmgrad[i][YY]) + SQR(dftb1.mmgrad[i][ZZ]) > 0.0001)
+      printf("MMGRAD %5d%12.7f%12.7f%12.7f\n", i+1, -dftb1.mmgrad[i][XX], -dftb1.mmgrad[i][YY], -dftb1.mmgrad[i][ZZ]);
+    */
+    for(j=0; j<DIM; j++) {
+      f[i + nn][j]      = (real) HARTREE_BOHR2MD * dftb1.mmgrad[i][j];      
+      fshift[i + nn][j] = (real) HARTREE_BOHR2MD * dftb1.mmgrad[i][j];
+    }
+  }
+
+  clock_gettime(CLOCK_MONOTONIC, &time_dftbstop);
+  print_time_difference("DFTB FRC TIME:", time_sccstop, time_dftbstop);
+  print_time_difference("DFTB     TIME:", time_dftbstart, time_dftbstop);
+
+  return HARTREE2KJ * AVOGADRO * (eel + erep + edisp);
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_fermi.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_fermi.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_fermi.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_fermi.c	2012-09-13 14:25:25.000000000 +0200
@@ -0,0 +1,94 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+// #include"charge_transfer.h"
+#include"qm_dftb.h"
+
+#define DEGTOL (1.e-4)
+#define KBOLTZ (3.1668114e-6) /* Boltzmann constant in Hartree / K */
+#define EFRANGE (0.05)
+#define EEPS (1.e-6)
+#define CEPS (1.e-8)
+
+void fermi(int ndim, double *ev, double *occ, double *efermi, int nelectrons, double telec)
+{
+  int i, nef1, nef2;
+  double sumoccup, beta, ef1, ef2;
+
+  beta = 1 / (KBOLTZ * telec);
+
+  if (nelectrons > 2*ndim) {
+    printf("Too many electrons: %d > %d\n", nelectrons, 2*ndim);
+    exit(-1);
+  }
+
+  // start defining occupation numbers and their derivatives
+  for (i=0; i<ndim; i++)
+    occ[i] = 0.0;
+
+  if (nelectrons == 0)
+    return;
+
+  // find energy range for Fermi energy
+  nef1 = (int) ((nelectrons-1)/2);
+  nef2 = (int) (nelectrons/2);
+  // for an even number of electrons, nef1 + 1 = nef2, and nef1==HOMO, nef2==LUMO
+
+  *efermi = 0.5 * (ev[nef1]  + ev[nef2]);
+/*  
+  nup = ndown = nef1;
+
+  for ( ; nup < ndim; nup++)
+    if (fabs(ev[nup+1]-*efermi) > DEGTOL)
+      break;
+
+  for ( ; ndown > 0; ndown--)
+    if (fabs(ev[ndown]-*efermi) > DEGTOL)
+      break;
+
+  ndeg = nup - ndown;
+  nocc2 = ndown;
+
+  for (i=0; i<nocc2; i++)
+    occ[i] = 2.0;
+
+  if (ndeg == 0)
+    return;
+*/
+  /* for T = 0, occupy orbitals as usually
+  occdg = (double(nelectrons) - 2*nocc2) / ndeg;
+  for (i=nocc2; i<nocc2+ndeg; i++)
+    occ[i] = occdg;
+  */
+
+  /* for T > O, bracket and interate Fermi energy by bisection */
+  // eleft = (double) nelectrons; // - 2 * nocc2;
+  // istart = nocc2 + 1;
+  // iend   = istart + ndeg - 1;
+  // if (ndeg == 1) {
+  //	  occ[istart] = eleft;
+  //	  return;
+  // }
+
+  ef1 = *efermi - EFRANGE;
+  ef2 = *efermi + EFRANGE;
+  //ceps = dacc * chleft;
+  //eeps = dacc * MAX(fabs(ef1), fabs(ef2));
+  //printf ("sumoccup (Fermi) =");
+  do {
+	  *efermi = 0.5 * (ef1 + ef2);
+	  sumoccup = 0.;
+	  for (i=0; i<ndim; i++) { // for (i=istart; i<=iend; i++) {
+		  occ[i] = 2. / (1. + exp(beta * (ev[i] - *efermi)));
+		  sumoccup += occ[i];
+	  }
+	  //printf ("%10.2f (%10.7f),", sumoccup, *efermi);
+	  if (sumoccup > (double) nelectrons) // eleft)
+		  ef2 = *efermi;
+	  else
+		  ef1 = *efermi;
+  } while (fabs(sumoccup - (double) nelectrons) > CEPS || fabs(ef1 - ef2) > EEPS);
+  //} while (fabs(charge - eleft) > CEPS || fabs(ef1 - ef2) > EEPS);
+  //printf ("\n");
+  return;
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_gamma.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_gamma.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_gamma.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_gamma.c	2013-10-14 15:53:33.000000000 +0200
@@ -0,0 +1,325 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include"qm_dftb.h"
+
+/*
+c===================================================================
+c     calculate gamma or gamma^h-function and Gamma-function for 
+c     one atom pairing (gamma^h_{ij} and Gamma_{ij}) 
+c     details see Gaus JCTC 2011.
+c     
+c     INPUT:
+c     real*8  r           distance between the two atoms i and j
+c     real*8  ui,uj       Hubbard parameters for atom i and j
+c     real*8  udi         Hubbard derivative dU/dq for atom i
+c     logical xhgammahlp  .true. if h=exp(-((ui+uj)/2^zeta*r^2)) 
+c     real*8  zeta        parameter for gamma^h
+c    
+c     OUTPUT:
+c     real*8  gval        value for gamma_{ij} or gamma^h_{ij}
+c     real*8  gder        value for Gamma_{ij} = dgamma_{ij}/dq_i
+c===================================================================
+*/
+
+void gammaall(double r, double ui, double uj, double udi, int xhgammahlp, double zeta, double *gval, double *gder)
+{
+	const double zero=1.e-4;
+	double a,b,a2,a3,a4,a6,b2,b3,b4,b6,
+	       s,ar,g,fab,fba,dgda,dsdu,dfabda,dfbada,
+	       h,dhdu;
+
+	a = 3.2 * ui;
+	b = 3.2 * uj;
+
+	if (a+b < zero) {
+		*gval = 0.;
+		*gder = 0.;
+	} else {
+		if (r < zero) {
+			*gval = 0.15625 * (a+b);
+			*gder = 0.5;
+		} else {
+			if (fabs(a-b) < 1.0e-5) {
+				ar   = (a+b)/2. * r;
+				g    = (48. + 33.*ar + 9.*ar*ar + ar*ar*ar) / (r*48.);
+				s    = exp(-ar) * g;
+				dgda = (33. + 18.*ar + 3.*ar*ar) / 48.;
+				dsdu = 3.2 * exp(-ar) * (dgda - r*g);
+			} else {
+				a2   = a*a;
+				a3   = a2*a;
+				a4   = a2*a2;
+				a6   = a4*a2;
+				b2   = b*b;
+				b3   = b2*b;
+				b4   = b2*b2;
+				b6   = b4*b2;
+				fab  = a*b4 / (2.*SQR(a2-b2)) - (b6-3.*a2*b4) / (CUB(a2-b2)*r);
+				fba  = b*a4 / (2.*SQR(b2-a2)) - (a6-3.*b2*a4) / (CUB(b2-a2)*r);
+				s    = exp(-a*r) * fab + exp(-b*r) * fba;
+				dfabda = - (b6 + 3.*a2*b4) / (2. * CUB(a2-b2)) - 12.*a3*b4 / (r*QRT(a2-b2));
+				dfbada = 2.*b3*a3 / (CUB(b2-a2)) + 12.*a3*b4 / (r*QRT(b2-a2));
+				dsdu = 3.2 * (exp(-a*r) * (dfabda-r*fab) + exp(-b*r)*dfbada);
+			}
+			/* check for gamma^h */
+			if (xhgammahlp) {
+				h    = exp(-pow(((a+b)*0.15625), zeta) *r*r);
+				dhdu = -h*zeta*r*r * pow(((a+b)*0.15625), zeta-1.) * 0.5;
+				*gval= 1.0/r - s*h;
+				*gder= -(dsdu*h + s*dhdu);
+			} else {
+				*gval= 1./r - s;
+				*gder = -dsdu;
+			}
+		}
+		*gder *= udi;
+	}
+	return;
+}
+
+/*
+c===================================================================
+c     calculate dgamma/dr or dgamma^h/dr and dGamma/dr for
+c     one atom pairing (gamma^h_{ij} and Gamma_{ij})
+c     details see Gaus JCTC 2011.
+c     r=|R_j-R_i|
+c
+c     INPUT:
+c     real*8  r           distance between the two atoms i and j
+c     real*8  ui,uj       Hubbard parameters for atom i and j
+c     real*8  udi         Hubbard derivative dU/dq for atom i
+c     logical xhgammahlp  .true. if h=exp(-((ui+uj)/2^zeta*r^2))
+c     real*8  zeta        parameter for gamma^h
+c
+c     OUTPUT:
+c     real*8  dcdr        dgamma_{ij}/dr 
+c     real*8  dcdr3       dGamma_{ij}/dr = d^2gamma_{ij}/drdq_i
+c===================================================================
+*/
+
+void gammaall1(double r, double ui, double uj, double udi, int xhgammahlp, double zeta, double *dcdr, double *dcdr3)
+{
+	const double zero=1.e-4;
+	double r2,a,b,a2,a3,a4,b2,b3,b4,z,z2,zr,g,dgdr,dsdr,fab,fba,dfabdr,dfbadr,
+	       dcdudr,dsdudr,dgdadr,dgda,dfabdadr,dfbadadr,dfabda,dfbada,h,dhdu,dhdr,dhdudr,s,dsdu;
+
+	r2 = r*r;
+	a  = 3.2 * ui;
+	b  = 3.2 * uj;
+
+	if ((a+b < zero) || (r < zero)) {
+	       *dcdr = 0.;
+	       *dcdr3= 0.;
+	       r    = 99999999.9; /* WHY ??? */
+	} else { /* here: 1/r-s */
+		if (fabs(a-b) < 1.e-5) {
+			z    = 0.5 * (a+b);
+			z2   = z*z;
+			zr   = z*r;
+			g    = (48. + 33.*zr + 9.*zr*zr + zr*zr*zr) / (48.*r);
+			dgdr = -1./r2 + 3.*z2/16. + z2*zr/24.;
+			dsdr = exp(-zr) * (dgdr - z*g);
+			dgda   = (33. + 18.*zr + 3.0*zr*zr) / 48.;
+			dgdadr = 0.375*z + 0.125*z2*r;
+			dsdudr = 3.2 * exp(-zr) * (g*(zr-1.) - z*dgda + dgdadr - r*dgdr);
+			if (xhgammahlp) {
+				s    = exp(-zr)*g;
+				dsdu = 3.2 * exp(-zr) * (dgda-r*g);
+			}
+		} else {
+			a2  = a*a;
+			a3  = a2*a;
+			a4  = a2*a2;
+			b2  = b*b;
+			b3  = b2*b;
+			b4  = b2*b2;
+			fab = a*b4 / (2.*SQR(a2-b2)) - (b4*b2-3.*a2*b4) / (CUB(a2-b2)*r);
+			fba = b*a4 / (2.*SQR(b2-a2)) - (a4*a2-3.*b2*a4) / (CUB(b2-a2)*r);
+			dfabdr    = (b4*b2 - 3.*a2*b4) / (CUB(a2-b2)*r2);
+			dfbadr    = (a4*a2 - 3.*b2*a4) / (CUB(b2-a2)*r2);
+			dsdr      = exp(-a*r) * (dfabdr-a*fab) + exp(-b*r) * (dfbadr-b*fba);
+			dfabda    = - (b2*b4 + 3.*a2*b4) / (2.*CUB(a2-b2)) -12.*a3*b4 / (r*QRT(a2-b2));
+			dfbada    = 2.*b3*a3 / CUB(b2-a2) +12.*a3*b4 / (r*QRT(b2-a2));
+			dfabdadr   = 12.*a3*b4 / (r2*QRT(a2-b2));
+			dfbadadr  = -12.*a3*b4 / (r2*QRT(b2-a2));
+			dsdudr    = 3.2 * (exp(-a*r) * (fab*(a*r-1.) - a*dfabda + dfabdadr - r*dfabdr) + exp(-b*r) * (dfbadadr-b*dfbada));
+			if (xhgammahlp) {
+				s   = exp(-a*r) * fab + exp(-b*r) * fba;
+				dsdu= 3.2 * (exp(-a*r) * (dfabda-r*fab) + exp(-b*r) * dfbada);
+			}
+		}
+		/* check for gamma^h */
+		if (xhgammahlp) {
+			h      = exp(- pow((a+b)*0.15625, zeta)*r*r);
+			dhdu   = -h*zeta*r*r * pow((a+b)*0.15625, zeta-1.) * 0.5;
+			dhdr   = -h*2.*r * pow((a+b)*0.15625, zeta);
+			dhdudr = h*zeta*r * pow((a+b)*0.15625, zeta-1.) * (r*r * pow((a+b)*0.15625, zeta) - 1.);
+			*dcdr   = - 1./r2 - (dsdr*h + s*dhdr);
+			dcdudr = - (dsdudr*h + dsdu*dhdr + dsdr*dhdu + s*dhdudr);
+			*dcdr3  = dcdudr * udi;
+		} else {
+			*dcdr   = -1./r2 - dsdr;
+			dcdudr = -dsdudr;
+			*dcdr3  = dcdudr * udi;
+		}
+	} /* end 1/r-s */
+	return;
+}
+
+/*
+c=======================================================================
+c get the gamma and Gamma contribution to the gradient
+c -F_{kx}= 0.5d0*\Delta q_k\sum_{a!=k}\Delta q_a(dgamma_{ak}/dR_{kx}+
+c          dgamma_{ka}/dR_{kx})+1/3 \Delta q_k\sum_{a!=k}\Delta q_a (
+c          \Delta q_a dGamma_{ak}/dR_{kx}+\Delta q_k dGamma_{ak}/dR_{kx}
+c          )
+c
+c INPUT:
+c integer  nn            number of atoms (in one cell)
+c real*8   x(3,NNDIM)    coordinates
+c real*8   izp(NNDIM)    map from atoms to atom types      
+c real*8   uhubb(MAXTYP) Hubbard parameters
+c real*8   uhder(MAXTYP) Hubbard derivatives
+c logical  izpxh(MAXTYP) .true. for atom types which need extra term
+c                         in gamma^h if switched on
+c real*8   zeta          parameter for gamma^h (see Gaus JCTC 2011)
+c real*8   qdiff(NNDIM)  atomic net charges (Mulliken)
+c character*1 sccmode    last term of DFT taylor series which 
+c                        is included (e.g. 2=2nd order, 3=3rdorder)
+c
+c OUTPUT:
+c real*8   hgrad(3,NNDIM) gradient contribution 
+c
+c======================================================================
+*/
+
+void gammagrad(int nn, dvec *x, int *izp, double *uhubb, double *uhder,
+		int *izpxh, double zeta, double *qdiff, int sccmode, double **hgrad)
+{
+	int ix,k,j;
+	dvec tmp, tmp3, r;
+	double bond,dgdrkj,dgdr3kj, /* dgdr[NNDIM][NNDIM],dgdr3[NNDIM][NNDIM], */
+	       dgdrjk,dgdr3jk;
+
+	for (k=0; k<nn; k++)
+		clear_dvec(hgrad[k]);
+	/*  get dgamma/dr and dGamma/dr   (r=|R_j-R_k|)
+	 *  change with respect to original DFTB3:
+	 *  get dr/dR_{kx} and build gammagradient contribution
+	 *  right away in this cycle!
+	 */
+	for (k=0; k<nn; k++) {
+		clear_dvec(tmp);
+		clear_dvec(tmp3); 
+		for (j=0; j<nn; j++) if (j != k) {
+			dvec_sub(x[k], x[j], r);
+			bond = dnorm(r);
+			gammaall1(bond, uhubb[izp[k]], uhubb[izp[j]], uhder[izp[k]], izpxh[k] || izpxh[j], zeta, &dgdrkj, &dgdr3kj);
+			gammaall1(bond, uhubb[izp[j]], uhubb[izp[k]], uhder[izp[j]], izpxh[k] || izpxh[j], zeta, &dgdrjk, &dgdr3jk);
+			/*dgdr[k][j]  = dgdrkj;
+			  dgdr3[k][j] = dgdr3kj; */
+			for (ix=0; ix<3; ix++) {
+				tmp[ix] += qdiff[j] * (dgdrjk + dgdrkj) / bond * r[ix];
+				tmp3[ix]+= qdiff[j] * (qdiff[j]*dgdr3jk + qdiff[k]*dgdr3kj) / bond * r[ix];
+			}
+		}
+		if (sccmode == 3)
+			for (ix=0; ix<3; ix++)
+				hgrad[k][ix] = qdiff[k] * (0.5*tmp[ix] + tmp3[ix]/3.);
+		else
+			for (ix=0; ix<3; ix++)
+				hgrad[k][ix] = qdiff[k] * 0.5 * tmp[ix];
+	}
+	return;
+}
+/*
+c======================================================================
+c   Build symmetric matrix gammamat
+c   Build non-symmetric matrix gammader
+c
+c   INPUT:
+c   integer nn            number of atoms
+c   real*8  x(3,NNDIM)    position of atoms
+c   integer izp(NNDIM)    map from atoms to atom types
+c   real*8  uhubb(MAXTYP) Hubbard parameters
+c   real*8  uhder(MAXTYP) Hubbard derivatives dU/dq
+c   real*8  zeta          parameter for gamma^h (see Gaus JCTC 2011)
+c   logical izpxh(MAXTYP) .true. for atom types which need extra term
+c                         in gamma^h if switched on
+c
+c   OUTPUT:
+c   real*8 gammamat(*,*) matrix containing gamma/gamma^h for all atom-pairings
+c   real*8 gammader(*,*) matrix containing Gamma=dgamma/dq for DFTB3 
+c                        for all atom-pairings
+c
+c   Note that code is made efficient (but still easily readable) 
+c   for DFTB3, but allows also running DFTB2, therefore gammader 
+c   is calculated by default in this function but of course may 
+c   be zeroed or controlled by a subroutine calling get_gammamat 
+c   or using the OUTPUT.
+c
+c======================================================================
+*/
+
+void get_gammamat(int nn, dvec *x, int *izp, double *uhubb, double *uhder, double zeta,
+		int *izpxh, double **gammamat, double **gammader)
+{
+	int i, j;
+	dvec r;
+
+	for (i=0; i<nn; i++)
+		for (j=0; j<nn; j++) {
+			dvec_sub(x[i], x[j], r);
+			gammaall(dnorm(r), uhubb[izp[i]], uhubb[izp[j]], uhder[izp[i]], izpxh[i] || izpxh[j],
+					zeta, &(gammamat[i][j]), &(gammader[i][j]));
+		}
+}
+
+double gams(double r, double ua, double ub)
+{
+	double a,b,ar,a2,a4,a6,b2,b4,b6,fab,fba;
+
+	a = 3.2 * ua;
+	b = 3.2 * ub;
+	if (fabs(a-b) < 1.e-5) {
+		ar   = (a+b)/2. * r;
+		return exp(-ar) * (48. + 33.*ar + 9.*ar*ar + ar*ar*ar) / (r*48.);
+	} else {
+		a2   = a*a;
+		a4   = a2*a2;
+		a6   = a4*a2;
+		b2   = b*b;
+		b4   = b2*b2;
+		b6   = b4*b2;
+		fab  = a*b4 / (2.*SQR(a2-b2)) - (b6 - 3.*a2*b4) / (CUB(a2-b2)*r);
+		fba  = b*a4 / (2.*SQR(b2-a2)) - (a6 - 3.*b2*a4) / (CUB(b2-a2)*r);
+		return exp(-a*r) * fab + exp(-b*r) * fba;
+	}
+}
+
+double gams1(double r, double ua, double ub)
+{
+	double a,b,z,z2,zr,g,dgdr,a2,a4,b2,b4,fab,fba,dfabdr,dfbadr;
+
+	a = 3.2 * ua;
+	b = 3.2 * ub;
+	if (fabs(a-b) < 1.e-5) {
+		z     = (a+b)/2.;
+		z2    = z*z;
+		zr    = z*r;
+		g     = (48. + 33.*zr + 9.*zr*zr + zr*zr*zr) / (48.*r);
+		dgdr  = -1./(r*r) + 3.*z2/16. + z2*zr/24.;
+		return exp(-zr) * (dgdr-z*g);
+	} else {
+		a2 = a*a;
+		a4 = a2*a2;
+		b2 = b*b;
+		b4 = b2*b2;
+		fab   = a*b4/(2.*SQR(a2-b2)) - (b4*b2 - 3.*a2*b4) / (CUB(a2-b2)*r);
+		fba   = b*a4/(2.*SQR(b2-a2)) - (a4*a2 - 3.*b2*a4) / (CUB(b2-a2)*r);
+		dfabdr= (b4*b2 - 3.*a2*b4) / (CUB(a2-b2)*r*r);
+		dfbadr= (a4*a2 - 3.*b2*a4) / (CUB(b2-a2)*r*r);
+		return exp(-a*r) * (dfabdr-a*fab) + exp(-b*r) * (dfbadr-b*fba);
+	}
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_gammamat.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_gammamat.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_gammamat.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_gammamat.c	2012-09-13 14:25:22.000000000 +0200
@@ -0,0 +1,120 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+// #include"charge_transfer.h"
+#include"qm_dftb.h"
+
+inline double gamsub(double a, double b, double r, double rrc)
+{
+  double drc, fac;
+
+  drc= 1.0 / (a*a - b*b);
+  fac = (b*b*b*b*b*b - 3*a*a*b*b*b*b) * drc*drc*drc * rrc;
+  return exp(-a*r) * (0.5 * a * b*b*b*b * drc*drc - fac);
+}
+
+inline double gamsubder(double a, double b, double r, double rrc)
+{
+  double drc, fac;
+
+  drc = 1.0 / (a*a - b*b);
+  fac = (b*b*b*b*b*b - 3*a*a*b*b*b*b) * drc*drc*drc * rrc;
+  return exp(-a*r) * (-a * (0.5 * a * b*b*b*b * drc*drc - fac) + fac * rrc);
+}
+
+void gammamatrix(int nat, double (*rat)[3], double **gammamat, double uhubb[DFTB_MAXTYPES], int *izp)
+{
+  int i, j;
+  dvec r;
+
+  for (i=0; i<nat; i++)
+    for (j=0; j<=i; j++) {
+      dvec_sub(rat[i], rat[j], r);
+      //r[0] = rat[i][0] - rat[j][0];
+      //r[1] = rat[i][1] - rat[j][1];
+      //r[2] = rat[i][2] - rat[j][2];
+      //rnorm = sqrt(r[0]*r[0]+r[1]*r[1]+r[2]*r[2]);
+      // get value for gamma
+      gammamat[i][j] = gam12(dnorm(r), uhubb[izp[i]], uhubb[izp[j]]);
+    }
+  return;
+}
+
+double gam12(double r, double uhub1, double uhub2)
+{
+  const double zero=1.e-4;
+  double a1, a2, src, avg, rrc, fac, efac;
+
+  a1 = 3.2*uhub1;
+  a2 = 3.2*uhub2;
+
+  if (a1+a2 < zero)
+    return 0.0;
+
+  src = 1.0 / (a1+a2);
+  fac = a1*a2*src;
+  avg = 1.6 * (fac + fac*fac*src);
+
+  if (r < zero)
+    return 0.3125*avg;
+  else {
+    rrc = 1.0/r;
+    if (fabs(a1-a2) < 1.e-5) {
+      fac = avg * r;
+      efac = exp(-fac)/48.0;
+      return (1.0 - (48.0+33*fac+fac*fac*(9.0+fac))*efac) * rrc;
+    } else
+      return rrc - gamsub(a1, a2, r, rrc) - gamsub(a2, a1, r, rrc);
+  }
+}
+
+void gammamatrix1(int nat, double (*rat)[3], int *izp, double uhubb[DFTB_MAXTYPES], double (**gamma_deriv)[3])
+{
+  int i, j;
+  double r[3], gdrv, rnorm;
+
+  for (i=0; i<nat; i++)
+    for (j=0; j<i; j++) {
+      r[0] = rat[j][0] -rat[i][0];
+      r[1] = rat[j][1] -rat[i][1];
+      r[2] = rat[j][2] -rat[i][2];
+      rnorm = sqrt(r[0]*r[0]+r[1]*r[1]+r[2]*r[2]);
+      // get value for the gradient
+      gdrv = gam121(rnorm, uhubb[izp[i]], uhubb[izp[j]]);
+      gamma_deriv[i][j][0] = gdrv * r[0];
+      gamma_deriv[i][j][1] = gdrv * r[1];
+      gamma_deriv[i][j][2] = gdrv * r[2];
+    }
+
+  return;
+}      
+
+double gam121(double r, double uhub1, double uhub2)
+{
+  const double zero  = 1.e-4;
+  double a1, a2, src, avg, rrc, rrc3, fac, efac;
+
+  a1 = 3.2*uhub1;
+  a2 = 3.2*uhub2;
+
+  if (a1+a2 < zero)
+    return 0.0;
+
+  src = 1.0 / (a1+a2);
+  fac = a1*a2*src;
+  avg = 1.6 * (fac + fac*fac*src);
+
+  if (r < zero)
+    return 0.0;
+  else {
+    rrc = 1.0/r;
+    rrc3 = rrc * rrc * rrc;
+    if (fabs(a1-a2) < 1.e-5) {
+      fac = avg * r;
+      efac = exp(-fac)/48.0;
+      return - (1.0 - (48.0 + 48.0 * fac + SQR(fac) * (24.0 + 7.0 * fac + SQR(fac))) * efac) * rrc3;
+    } else
+      return - rrc3 - (gamsubder(a1, a2, r, rrc) + gamsubder(a2, a1, r, rrc)) * rrc;
+  }
+}
+
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_gradient.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_gradient.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_gradient.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_gradient.c	2013-11-01 13:39:09.000000000 +0100
@@ -0,0 +1,215 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include"qm_dftb.h"
+
+// blas routine(s)
+static void dsyr(char uplo, long n, long alpha, double *x, long incx, double *a, long lda)
+{
+  extern void dsyr_(char *, long *, long *, double *, long *, double *, long *);
+  dsyr_(&uplo, &n, &alpha, x, &incx, a, &lda);
+  return;
+}
+
+void usual_gradient(dftb_t *dftb, dvec *x, dvec *grad)
+{
+  int i, j, k, izpj, izpk;
+  int m, n, indj, indk, lj, mj, nu, lk, mk, mu;
+  double ocmcc, xhelp, dgrh, dgrs, dgr, dgr3;
+
+  const double deltax = 0.01;
+  dftb_phase1_t dftb1 = dftb->phase1;
+
+  dgr = dgr3 = 0.;
+
+  for (m=0; m<dftb1.norb; m++)
+    for (n=0; n<dftb1.norb; n++)
+      dftb1.b[m][n] = 0.e0;
+
+  for (i=0; i<dftb1.norb; i++) {
+    if (dftb1.occ[i] < dftb->dacc)
+      break;
+    for (m=0; m<dftb1.norb; m++)
+      for (n=0; n<m; n++) {
+        ocmcc = dftb1.occ[i] * dftb1.a[m][i] * dftb1.a[n][i];
+        dftb1.b[n][m] += ocmcc * dftb1.ev[i];
+        dftb1.b[m][n] += ocmcc;
+      }
+  }
+  for (m=0; m<dftb1.norb; m++)
+    for (n=0; n<m; n++)
+      if (fabs(dftb1.b[m][n]) < dftb->dacc)
+        dftb1.b[m][n] = 0.e0;
+
+  /* the matrix b is correct here! */
+
+  for (j=0; j<dftb->atoms; j++) { /* for every atom that forces act upon */
+    indj = dftb1.ind[j];
+    izpj = dftb1.izp[j];
+    for (k=0; k<dftb->atoms; k++) if (k != j) { /* for every atom acting on the studied atom j */
+      indk = dftb1.ind[k];
+      izpk = dftb1.izp[k];
+      
+      for (i=0; i<3; i++) { /* XX, YY and ZZ */
+        /* derivative of the slko matrices */
+        xhelp = dftb1.x[j][i];
+	x[j][i] = xhelp + deltax;
+	slkmatrices(k, j, dftb1.x, dftb1.au,  dftb1.bu,  dftb->lmax, dftb->dim1, dftb->dr1, dftb1.izp, dftb->skstab1, dftb->skhtab1, dftb->skself1);
+	x[j][i] = xhelp - deltax;
+	slkmatrices(k, j, dftb1.x, dftb1.auh, dftb1.buh, dftb->lmax, dftb->dim1, dftb->dr1, dftb1.izp, dftb->skstab1, dftb->skhtab1, dftb->skself1);
+	x[j][i] = xhelp;
+	/* use summation over angular momentum and magnetic quantum numbers
+	 * because shift is actually defined for the orbitals */
+	for (lj=1; lj<=dftb->lmax[izpj]; lj++)
+	  for (mj=1; mj<2*lj; mj++) {
+	    n  = SQR(lj-1) + mj - 1;
+	    nu = n + indj;
+	    for (lk=1; lk<=dftb->lmax[izpk]; lk++)
+	      for (mk=1; mk<2*lk; mk++) {
+	        m  = SQR(lk-1) + mk - 1;
+		mu = m + indk;
+		/* dgrh = 2 * ( d H_{k,j}^{m,n}/ d R_{j} ) */
+		dgrh = (dftb1.au[m][n] - dftb1.auh[m][n]) / deltax;
+		/* dgrs = - 2 * ( d S_{k,j}^{m,n}/ d R_{j} ) */
+		dgrs = -(dftb1.bu[m][n] - dftb1.buh[m][n]) / deltax;
+		/* dgr = ( d S_{k,j}^{m,n}/ d R_{j} ) * (shift(k)+shift(j))
+		 * dgr3 =  ( d S_{k,j)^{m,n}/ d R_{j} ) * (2*shift3(k)+shift3A(k))/3.0 */
+		dgr = -0.5 * dgrs * (dftb1.shift[k] + dftb1.shift[j]);
+                /* NOTE THAT WITH CDKO, shiftE2 would have to come in here as well !!! */
+		if (dftb->sccmode == 3)
+		  dgr3 = -0.5 * dgrs * (2.*dftb1.shift3[k] + dftb1.shift3a[k] + 2.*dftb1.shift3[j] + dftb1.shift3a[j])/3.;
+		/* only lower triangle contains sum_i n(i) c_{mu,i} c_{nu,i}
+		 * only upper triangle contains sum_i epsilon_i n(i) c_{mu,i} c_{nu,i} */
+		if (mu > nu) {
+		  dgrh *= dftb1.b[mu][nu];
+		  dgrs *= dftb1.b[nu][mu];
+		  dgr  *= dftb1.b[mu][nu];
+		  if (dftb->sccmode == 3) dgr3 *= dftb1.b[mu][nu];
+		} else {
+		  dgrh *= dftb1.b[nu][mu];
+		  dgrs *= dftb1.b[mu][nu];
+		  dgr  *= dftb1.b[nu][mu];
+		  if (dftb->sccmode == 3) dgr3 *= dftb1.b[nu][mu];
+		}
+		grad[j][i] += dgrh + dgrs + dgr;
+		if (dftb->sccmode == 3) grad[j][i] += dgr3;
+	      }
+	  }
+      }
+
+    }
+  }
+  return;  
+}
+
+void gamma_gradient_old(dftb_t *dftb, dvec *x, dvec *grad)
+{
+  int i, k;
+  dvec deriv, tmpderiv, addend;
+  dftb_phase1_t dftb1;
+
+  dftb1 = dftb->phase1;
+
+  gammamatrix1(dftb->atoms, dftb1.x, dftb1.izp, dftb->uhubb1, dftb1.gamma_deriv);
+
+  for (k=0; k<dftb->atoms; k++) {
+    clear_dvec(deriv);
+    for (i=0; i<dftb->atoms; i++) {
+      if (i == k)
+        continue;
+      if (i > k)
+        dsvmul( 1.0, dftb1.gamma_deriv[i][k], tmpderiv);
+      else
+        dsvmul(-1.0, dftb1.gamma_deriv[k][i], tmpderiv);
+      dsvmul(dftb1.qmat[i] - dftb->qzero1[dftb1.izp[i]], tmpderiv, addend);
+      dvec_inc(deriv, addend);            /* deriv += qdiff[i] * tmpderiv */
+    }
+    dsvmul(dftb1.qmat[k] - dftb->qzero1[dftb1.izp[k]], deriv, addend);
+    dvec_inc(grad[k], addend);            /* grad[k] += qdiff[k] * deriv */
+  }
+
+  return;
+}
+
+/*
+c=======================================================================
+c get the gamma and Gamma contribution to the gradient
+c -F_{kx}= 0.5d0*\Delta q_k\sum_{a!=k}\Delta q_a(dgamma_{ak}/dR_{kx}+
+c          dgamma_{ka}/dR_{kx})+1/3 \Delta q_k\sum_{a!=k}\Delta q_a (
+c          \Delta q_a dGamma_{ak}/dR_{kx}+\Delta q_k dGamma_{ak}/dR_{kx}
+c          )
+c
+c INPUT:
+c integer  nn            number of atoms (in one cell)
+c real*8   x(3,NNDIM)    coordinates
+c real*8   izp(NNDIM)    map from atoms to atom types      
+c real*8   uhubb(MAXTYP) Hubbard parameters
+c real*8   uhder(MAXTYP) Hubbard derivatives
+c logical  izpxh(MAXTYP) .true. for atom types which need extra term
+c                         in gamma^h if switched on
+c real*8   zeta          parameter for gamma^h (see Gaus JCTC 2011)
+c real*8   qdiff(NNDIM)  atomic net charges (Mulliken)
+c character*1 sccmode    last term of DFT taylor series which 
+c                        is included (e.g. 2=2nd order, 3=3rdorder)
+c
+c OUTPUT:
+c real*8   grad(3,NNDIM) gradient contribution 
+c
+c======================================================================
+*/
+
+void gamma_gradient(dftb_t *dftb, dvec *x, dvec *grad)
+//void gamma_gradient(int nn, dvec *x, int *izp, double *uhubb, double *uhder,
+//		int *izpxh, double zeta, double *qdiff, int sccmode, dvec *grad)
+{
+	int ix,k,j;
+	dvec tmp, tmp3, r;
+	double bond,dgdrkj,dgdr3kj, /* dgdr[NNDIM][NNDIM],dgdr3[NNDIM][NNDIM], */
+	       dgdrjk,dgdr3jk,qdiffj,qdiffk;
+
+	dftb_phase1_t dftb1 = dftb->phase1;
+	int nn = dftb1.nn;
+	int *izp = dftb1.izp;
+	double *uhubb = dftb->uhubb1;
+	double *uhder = dftb->uhder1;
+	int *izpxh = dftb1.izpxh;
+	double zeta = dftb->zeta1;
+	double *qmat = dftb1.qmat;
+	double *qzero = dftb->qzero1;
+
+	for (k=0; k<nn; k++)
+		clear_dvec(grad[k]);
+	/*  get dgamma/dr and dGamma/dr   (r=|R_j-R_k|)
+	 *  change with respect to original DFTB3:
+	 *  get dr/dR_{kx} and build gammagradient contribution
+	 *  right away in this cycle!
+	 */
+	for (k=0; k<nn; k++) {
+		clear_dvec(tmp);
+		clear_dvec(tmp3); 
+		qdiffk = qmat[k] - qzero[izp[k]];
+		for (j=0; j<nn; j++) if (j != k) {
+			dvec_sub(x[k], x[j], r);
+			bond = dnorm(r);
+			gammaall1(bond, uhubb[izp[k]], uhubb[izp[j]], uhder[izp[k]], izpxh[k] || izpxh[j], zeta, &dgdrkj, &dgdr3kj);
+                        // fprintf(stderr, "gammaall1(%d,%d): dgdr = %7.5f, dgdr3 = %7.5f\n", k+1, j+1, dgdrkj, dgdr3kj);
+			gammaall1(bond, uhubb[izp[j]], uhubb[izp[k]], uhder[izp[j]], izpxh[k] || izpxh[j], zeta, &dgdrjk, &dgdr3jk);
+                        // fprintf(stderr, "gammaall1(%d,%d): dgdr = %7.5f, dgdr3 = %7.5f\n", j+1, k+1, dgdrjk, dgdr3jk);
+			/*dgdr[k][j]  = dgdrkj;
+			  dgdr3[k][j] = dgdr3kj; */
+			qdiffj = qmat[j] - qzero[izp[j]];
+			for (ix=0; ix<3; ix++) {
+				tmp[ix] += qdiffj * (dgdrjk + dgdrkj) / bond * r[ix];
+				tmp3[ix]+= qdiffj * (qdiffj*dgdr3jk + qdiffk*dgdr3kj) / bond * r[ix];
+			}
+		}
+		if (dftb->sccmode == 3)
+			for (ix=0; ix<3; ix++)
+				grad[k][ix] = qdiffk * (0.5*tmp[ix] + tmp3[ix]/3.);
+		else
+			for (ix=0; ix<3; ix++)
+				grad[k][ix] = qdiffk * 0.5 * tmp[ix];
+	}
+	return;
+}
+
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb.h gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb.h
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb.h	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb.h	2014-10-13 16:31:52.000000000 +0200
@@ -0,0 +1,89 @@
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <signal.h>
+#include <stdlib.h>
+
+#include <string.h>
+#include "typedefs.h"
+#include "gromacs/utility/smalloc.h"
+#include "sysstuff.h"
+#include "vec.h"
+//#include "statutil.h"
+#include "vcm.h"
+#include "mdebin.h"
+#include "nrnb.h"
+#include "calcmu.h"
+#include "index.h"
+#include "vsite.h"
+#include "update.h"
+#include "ns.h"
+//#include "trnio.h"
+//#include "xtcio.h"
+#include "mdrun.h"
+//#include "confio.h"
+#include "network.h"
+//#include "pull.h"
+#include "xvgr.h"
+#include "physics.h"
+#include "names.h"
+//#include "xmdrun.h"
+//#include "ionize.h"
+#include "disre.h"
+#include "orires.h"
+//#include "dihre.h"
+//#include "pppm.h"
+#include "pme.h"
+#include "mdatoms.h"
+//#include "repl_ex.h"
+#include "qmmm.h"
+//#include "mpelogging.h"
+#include "domdec.h"
+//#include "partdec.h"
+//#include "topsort.h"
+#include "coulomb.h"
+//#include "constr.h"
+#include "shellfc.h"
+//#include "compute_io.h"
+#include "mvdata.h"
+#include "checkpoint.h"
+//#include "mtop_util.h"
+
+/*
+#ifdef GMX_MPI
+#include <mpi.h>
+#endif
+*/
+
+#define MAXITER_DIIS (200)
+#define MAXITER_SCC (70)
+
+#define NM_TO_BOHR (18.897259886)
+#define HARTREE_TO_EV (27.211396132)
+#define AU_OF_ESP_TO_VOLT (14.400)
+#define HARTREE_TO_KJMOL (HARTREE2KJ*AVOGADRO)
+#define KJMOL_TO_HARTREE (1/HARTREE_TO_KJMOL)
+
+#define BROYDEN_ALMIX (0.2)
+#define BROYDEN_SCFTOL (1.e-6)
+#define FERMI_KT (9.5004455e-4) // kT in hartree units, at 300 K
+// #define FERMI_KT (4.75e-4) // kT in hartree units, at 150 K
+#define FERMI_CONVERG (1.e-12) // 1.e-9 kT in hartree units (at 300 K)
+#define SIMPLE_ALMIX (0.01)
+#define ALMIX_ATTENUATOR (0.9)
+
+#define QMMM_DFTB_SWITCH (0.2) // length of the additional switching region beyond cutoff
+#define QMMM_DFTB_LIST   (0.1) // length of the additional buffer for neighborsearching beyong cutoff+switch
+
+#define SQR(x) ((x)*(x))
+#define CUB(x) ((x)*(x)*(x))
+#define QRT(x) ((x)*(x)*(x)*(x))
+#define HEX(x) ((x)*(x)*(x)*(x)*(x)*(x))
+#define OCT(x) ((x)*(x)*(x)*(x)*(x)*(x)*(x)*(x))
+#define CHOOSE2(x) ((x)*(x+1)/2)
+
+#define QM_CHARGE(x) (-dftb1.qmat[(x)] + dftb->qzero1[dftb1.izp[(x)]])
+
+#include "qm_dftb_declarations.h"
+
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_levmar.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_levmar.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_levmar.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_levmar.c	2015-02-10 16:52:50.519509529 +0100
@@ -0,0 +1,1734 @@
+/////////////////////////////////////////////////////////////////////////////////
+// 
+//  Solution of linear systems involved in the Levenberg - Marquardt
+//  minimization algorithm
+//  Copyright (C) 2004  Manolis Lourakis (lourakis at ics forth gr)
+//  Institute of Computer Science, Foundation for Research & Technology - Hellas
+//  Heraklion, Crete, Greece.
+//
+//  This program is free software; you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation; either version 2 of the License, or
+//  (at your option) any later version.
+//
+//  This program is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//
+/////////////////////////////////////////////////////////////////////////////////
+
+/******************************************************************************** 
+ * LAPACK-based implementations for various linear system solvers. The same core
+ * code is used with appropriate #defines to derive single and double precision
+ * solver versions, see also Axb_core.c
+ ********************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <float.h>
+
+#include "qm_dftb_levmar.h"
+
+/* double precision definitions */
+#define LM_REAL double
+#define LM_PREFIX d
+#define LM_CNST(x) (x)
+#define LM_REAL_EPSILON DBL_EPSILON
+#define LM_REAL_MAX DBL_MAX
+#define LM_REAL_MIN -DBL_MAX
+
+/////////////////////////////////////////////////////////////////////////////////
+// 
+//  Solution of linear systems involved in the Levenberg - Marquardt
+//  minimization algorithm
+//  Copyright (C) 2004  Manolis Lourakis (lourakis at ics forth gr)
+//  Institute of Computer Science, Foundation for Research & Technology - Hellas
+//  Heraklion, Crete, Greece.
+//
+//  This program is free software; you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation; either version 2 of the License, or
+//  (at your option) any later version.
+//
+//  This program is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//
+/////////////////////////////////////////////////////////////////////////////////
+
+
+/* Solvers for the linear systems Ax=b. Solvers should NOT modify their A & B arguments! */
+
+#ifdef LINSOLVERS_RETAIN_MEMORY
+#define __STATIC__ static
+#else
+#define __STATIC__ // empty
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+
+/* prototypes of LAPACK routines */
+
+#define GEQRF LM_MK_LAPACK_NAME(geqrf)
+#define ORGQR LM_MK_LAPACK_NAME(orgqr)
+#define TRTRS LM_MK_LAPACK_NAME(trtrs)
+#define POTF2 LM_MK_LAPACK_NAME(potf2)
+#define POTRF LM_MK_LAPACK_NAME(potrf)
+#define POTRS LM_MK_LAPACK_NAME(potrs)
+#define GETRF LM_MK_LAPACK_NAME(getrf)
+#define GETRS LM_MK_LAPACK_NAME(getrs)
+#define GESVD LM_MK_LAPACK_NAME(gesvd)
+#define GESDD LM_MK_LAPACK_NAME(gesdd)
+#define SYTRF LM_MK_LAPACK_NAME(sytrf)
+#define SYTRS LM_MK_LAPACK_NAME(sytrs)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* QR decomposition */
+extern int GEQRF(int *m, int *n, LM_REAL *a, int *lda, LM_REAL *tau, LM_REAL *work, int *lwork, int *info);
+extern int ORGQR(int *m, int *n, int *k, LM_REAL *a, int *lda, LM_REAL *tau, LM_REAL *work, int *lwork, int *info);
+
+/* solution of triangular systems */
+extern int TRTRS(char *uplo, char *trans, char *diag, int *n, int *nrhs, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, int *info);
+
+/* Cholesky decomposition and systems solution */
+extern int POTF2(char *uplo, int *n, LM_REAL *a, int *lda, int *info);
+extern int POTRF(char *uplo, int *n, LM_REAL *a, int *lda, int *info); /* block version of dpotf2 */
+extern int POTRS(char *uplo, int *n, int *nrhs, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, int *info);
+
+/* LU decomposition and systems solution */
+extern int GETRF(int *m, int *n, LM_REAL *a, int *lda, int *ipiv, int *info);
+extern int GETRS(char *trans, int *n, int *nrhs, LM_REAL *a, int *lda, int *ipiv, LM_REAL *b, int *ldb, int *info);
+
+/* Singular Value Decomposition (SVD) */
+extern int GESVD(char *jobu, char *jobvt, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu,
+                   LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *info);
+
+/* lapack 3.0 new SVD routine, faster than xgesvd().
+ * In case that your version of LAPACK does not include them, use the above two older routines
+ */
+extern int GESDD(char *jobz, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt,
+                   LM_REAL *work, int *lwork, int *iwork, int *info);
+
+/* LDLt/UDUt factorization and systems solution */
+extern int SYTRF(char *uplo, int *n, LM_REAL *a, int *lda, int *ipiv, LM_REAL *work, int *lwork, int *info);
+extern int SYTRS(char *uplo, int *n, int *nrhs, LM_REAL *a, int *lda, int *ipiv, LM_REAL *b, int *ldb, int *info);
+#ifdef __cplusplus
+}
+#endif
+
+/* precision-specific definitions */
+#define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR)
+#define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS)
+#define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol)
+#define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU)
+#define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD)
+#define AX_EQ_B_BK LM_ADD_PREFIX(Ax_eq_b_BK)
+
+/*
+ * This function returns the solution of Ax = b
+ *
+ * The function is based on QR decomposition with explicit computation of Q:
+ * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes
+ * Q R x = b or R x = Q^T b.
+ * The last equation can be solved directly.
+ *
+ * A is mxm, b is mx1
+ *
+ * The function returns 0 in case of error, 1 if successful
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int AX_EQ_B_QR(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
+{
+__STATIC__ LM_REAL *buf=NULL;
+__STATIC__ int buf_sz=0;
+
+static int nb=0; /* no __STATIC__ decl. here! */
+
+LM_REAL *a, *tau, *r, *work;
+int a_sz, tau_sz, r_sz, tot_sz;
+register int i, j;
+int info, worksz, nrhs=1;
+register LM_REAL sum;
+
+    if(!A)
+#ifdef LINSOLVERS_RETAIN_MEMORY
+    {
+      if(buf) free(buf);
+      buf=NULL;
+      buf_sz=0;
+
+      return 1;
+    }
+#else
+      return 1; /* NOP */
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+   
+    /* calculate required memory size */
+    a_sz=m*m;
+    tau_sz=m;
+    r_sz=m*m; /* only the upper triangular part really needed */
+    if(!nb){
+      LM_REAL tmp;
+
+      worksz=-1; // workspace query; optimal size is returned in tmp
+      GEQRF((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&worksz, (int *)&info);
+      nb=((int)tmp)/m; // optimal worksize is m*nb
+    }
+    worksz=nb*m;
+    tot_sz=a_sz + tau_sz + r_sz + worksz;
+
+#ifdef LINSOLVERS_RETAIN_MEMORY
+    if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
+      if(buf) free(buf); /* free previously allocated memory */
+
+      buf_sz=tot_sz;
+      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
+      if(!buf){
+        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n");
+        exit(1);
+      }
+    }
+#else
+      buf_sz=tot_sz;
+      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
+      if(!buf){
+        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n");
+        exit(1);
+      }
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+
+    a=buf;
+    tau=a+a_sz;
+    r=tau+tau_sz;
+    work=r+r_sz;
+
+  /* store A (column major!) into a */
+	for(i=0; i<m; i++)
+		for(j=0; j<m; j++)
+			a[i+j*m]=A[i*m+j];
+
+  /* QR decomposition of A */
+  GEQRF((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
+  /* error treatment */
+  if(info!=0){
+    if(info<0){
+      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQRF) " in ", AX_EQ_B_QR) "()\n", -info);
+      exit(1);
+    }
+    else{
+      fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %d for ", GEQRF) " in ", AX_EQ_B_QR) "()\n", info);
+#ifndef LINSOLVERS_RETAIN_MEMORY
+      free(buf);
+#endif
+
+      return 0;
+    }
+  }
+
+  /* R is stored in the upper triangular part of a; copy it in r so that ORGQR() below won't destroy it */ 
+  memcpy(r, a, r_sz*sizeof(LM_REAL));
+
+  /* compute Q using the elementary reflectors computed by the above decomposition */
+  ORGQR((int *)&m, (int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
+  if(info!=0){
+    if(info<0){
+      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", ORGQR) " in ", AX_EQ_B_QR) "()\n", -info);
+      exit(1);
+    }
+    else{
+      fprintf(stderr, RCAT("Unknown LAPACK error (%d) in ", AX_EQ_B_QR) "()\n", info);
+#ifndef LINSOLVERS_RETAIN_MEMORY
+      free(buf);
+#endif
+
+      return 0;
+    }
+  }
+
+  /* Q is now in a; compute Q^T b in x */
+  for(i=0; i<m; i++){
+    for(j=0, sum=0.0; j<m; j++)
+      sum+=a[i*m+j]*B[j];
+    x[i]=sum;
+  }
+
+  /* solve the linear system R x = Q^t b */
+  TRTRS("U", "N", "N", (int *)&m, (int *)&nrhs, r, (int *)&m, x, (int *)&m, &info);
+  /* error treatment */
+  if(info!=0){
+    if(info<0){
+      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QR) "()\n", -info);
+      exit(1);
+    }
+    else{
+      fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QR) "()\n", info);
+#ifndef LINSOLVERS_RETAIN_MEMORY
+      free(buf);
+#endif
+
+      return 0;
+    }
+  }
+
+#ifndef LINSOLVERS_RETAIN_MEMORY
+  free(buf);
+#endif
+
+	return 1;
+}
+
+/*
+ * This function returns the solution of min_x ||Ax - b||
+ *
+ * || . || is the second order (i.e. L2) norm. This is a least squares technique that
+ * is based on QR decomposition:
+ * If A=Q R with Q orthogonal and R upper triangular, the normal equations become
+ * (A^T A) x = A^T b  or (R^T Q^T Q R) x = A^T b or (R^T R) x = A^T b.
+ * This amounts to solving R^T y = A^T b for y and then R x = y for x
+ * Note that Q does not need to be explicitly computed
+ *
+ * A is mxn, b is mx1
+ *
+ * The function returns 0 in case of error, 1 if successful
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int AX_EQ_B_QRLS(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m, int n)
+{
+__STATIC__ LM_REAL *buf=NULL;
+__STATIC__ int buf_sz=0;
+
+static int nb=0; /* no __STATIC__ decl. here! */
+
+LM_REAL *a, *tau, *r, *work;
+int a_sz, tau_sz, r_sz, tot_sz;
+register int i, j;
+int info, worksz, nrhs=1;
+register LM_REAL sum;
+   
+    if(!A)
+#ifdef LINSOLVERS_RETAIN_MEMORY
+    {
+      if(buf) free(buf);
+      buf=NULL;
+      buf_sz=0;
+
+      return 1;
+    }
+#else
+      return 1; /* NOP */
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+   
+    if(m<n){
+		  fprintf(stderr, RCAT("Normal equations require that the number of rows is greater than number of columns in ", AX_EQ_B_QRLS) "() [%d x %d]! -- try transposing\n", m, n);
+		  exit(1);
+	  }
+      
+    /* calculate required memory size */
+    a_sz=m*n;
+    tau_sz=n;
+    r_sz=n*n;
+    if(!nb){
+      LM_REAL tmp;
+
+      worksz=-1; // workspace query; optimal size is returned in tmp
+      GEQRF((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&worksz, (int *)&info);
+      nb=((int)tmp)/m; // optimal worksize is m*nb
+    }
+    worksz=nb*m;
+    tot_sz=a_sz + tau_sz + r_sz + worksz;
+
+#ifdef LINSOLVERS_RETAIN_MEMORY
+    if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
+      if(buf) free(buf); /* free previously allocated memory */
+
+      buf_sz=tot_sz;
+      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
+      if(!buf){
+        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n");
+        exit(1);
+      }
+    }
+#else
+      buf_sz=tot_sz;
+      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
+      if(!buf){
+        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n");
+        exit(1);
+      }
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+
+    a=buf;
+    tau=a+a_sz;
+    r=tau+tau_sz;
+    work=r+r_sz;
+
+  /* store A (column major!) into a */
+	for(i=0; i<m; i++)
+		for(j=0; j<n; j++)
+			a[i+j*m]=A[i*n+j];
+
+  /* compute A^T b in x */
+  for(i=0; i<n; i++){
+    for(j=0, sum=0.0; j<m; j++)
+      sum+=A[j*n+i]*B[j];
+    x[i]=sum;
+  }
+
+  /* QR decomposition of A */
+  GEQRF((int *)&m, (int *)&n, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
+  /* error treatment */
+  if(info!=0){
+    if(info<0){
+      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", -info);
+      exit(1);
+    }
+    else{
+      fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %d for ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", info);
+#ifndef LINSOLVERS_RETAIN_MEMORY
+      free(buf);
+#endif
+
+      return 0;
+    }
+  }
+
+  /* R is stored in the upper triangular part of a. Note that a is mxn while r nxn */
+  for(j=0; j<n; j++){
+    for(i=0; i<=j; i++)
+      r[i+j*n]=a[i+j*m];
+
+    /* lower part is zero */
+    for(i=j+1; i<n; i++)
+      r[i+j*n]=0.0;
+  }
+
+  /* solve the linear system R^T y = A^t b */
+  TRTRS("U", "T", "N", (int *)&n, (int *)&nrhs, r, (int *)&n, x, (int *)&n, &info);
+  /* error treatment */
+  if(info!=0){
+    if(info<0){
+      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", -info);
+      exit(1);
+    }
+    else{
+      fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", info);
+#ifndef LINSOLVERS_RETAIN_MEMORY
+      free(buf);
+#endif
+
+      return 0;
+    }
+  }
+
+  /* solve the linear system R x = y */
+  TRTRS("U", "N", "N", (int *)&n, (int *)&nrhs, r, (int *)&n, x, (int *)&n, &info);
+  /* error treatment */
+  if(info!=0){
+    if(info<0){
+      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", -info);
+      exit(1);
+    }
+    else{
+      fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", info);
+#ifndef LINSOLVERS_RETAIN_MEMORY
+      free(buf);
+#endif
+
+      return 0;
+    }
+  }
+
+#ifndef LINSOLVERS_RETAIN_MEMORY
+  free(buf);
+#endif
+
+	return 1;
+}
+
+/*
+ * This function returns the solution of Ax=b
+ *
+ * The function assumes that A is symmetric & postive definite and employs
+ * the Cholesky decomposition:
+ * If A=L L^T with L lower triangular, the system to be solved becomes
+ * (L L^T) x = b
+ * This amounts to solving L y = b for y and then L^T x = y for x
+ *
+ * A is mxm, b is mx1
+ *
+ * The function returns 0 in case of error, 1 if successful
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int AX_EQ_B_CHOL(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
+{
+__STATIC__ LM_REAL *buf=NULL;
+__STATIC__ int buf_sz=0;
+
+LM_REAL *a;
+int a_sz, tot_sz;
+int info, nrhs=1;
+   
+    if(!A)
+#ifdef LINSOLVERS_RETAIN_MEMORY
+    {
+      if(buf) free(buf);
+      buf=NULL;
+      buf_sz=0;
+
+      return 1;
+    }
+#else
+      return 1; /* NOP */
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+   
+    /* calculate required memory size */
+    a_sz=m*m;
+    tot_sz=a_sz;
+
+#ifdef LINSOLVERS_RETAIN_MEMORY
+    if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
+      if(buf) free(buf); /* free previously allocated memory */
+
+      buf_sz=tot_sz;
+      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
+      if(!buf){
+        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n");
+        exit(1);
+      }
+    }
+#else
+      buf_sz=tot_sz;
+      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
+      if(!buf){
+        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n");
+        exit(1);
+      }
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+
+  a=buf;
+
+  /* store A into a and B into x. A is assumed symmetric,
+   * hence no transposition is needed
+   */
+  memcpy(a, A, a_sz*sizeof(LM_REAL));
+  memcpy(x, B, m*sizeof(LM_REAL));
+
+  /* Cholesky decomposition of A */
+  //POTF2("L", (int *)&m, a, (int *)&m, (int *)&info);
+  POTRF("L", (int *)&m, a, (int *)&m, (int *)&info);
+  /* error treatment */
+  if(info!=0){
+    if(info<0){
+      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTF2) "/", POTRF) " in ",
+                      AX_EQ_B_CHOL) "()\n", -info);
+      exit(1);
+    }
+    else{
+      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for ", POTF2) "/", POTRF) " in ", AX_EQ_B_CHOL) "()\n", info);
+#ifndef LINSOLVERS_RETAIN_MEMORY
+      free(buf);
+#endif
+
+      return 0;
+    }
+  }
+
+  /* solve using the computed Cholesky in one lapack call */
+  POTRS("L", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
+  if(info<0){
+    fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTRS) " in ", AX_EQ_B_CHOL) "()\n", -info);
+    exit(1);
+  }
+
+#ifndef LINSOLVERS_RETAIN_MEMORY
+  free(buf);
+#endif
+
+	return 1;
+}
+
+/*
+ * This function returns the solution of Ax = b
+ *
+ * The function employs LU decomposition:
+ * If A=L U with L lower and U upper triangular, then the original system
+ * amounts to solving
+ * L y = b, U x = y
+ *
+ * A is mxm, b is mx1
+ *
+ * The function returns 0 in case of error, 1 if successful
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
+{
+__STATIC__ LM_REAL *buf=NULL;
+__STATIC__ int buf_sz=0;
+
+int a_sz, ipiv_sz, tot_sz;
+register int i, j;
+int info, *ipiv, nrhs=1;
+LM_REAL *a;
+   
+    if(!A)
+#ifdef LINSOLVERS_RETAIN_MEMORY
+    {
+      if(buf) free(buf);
+      buf=NULL;
+      buf_sz=0;
+
+      return 1;
+    }
+#else
+      return 1; /* NOP */
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+   
+    /* calculate required memory size */
+    ipiv_sz=m;
+    a_sz=m*m;
+    tot_sz=a_sz*sizeof(LM_REAL) + ipiv_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
+
+#ifdef LINSOLVERS_RETAIN_MEMORY
+    if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
+      if(buf) free(buf); /* free previously allocated memory */
+
+      buf_sz=tot_sz;
+      buf=(LM_REAL *)malloc(buf_sz);
+      if(!buf){
+        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
+        exit(1);
+      }
+    }
+#else
+      buf_sz=tot_sz;
+      buf=(LM_REAL *)malloc(buf_sz);
+      if(!buf){
+        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
+        exit(1);
+      }
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+
+    a=buf;
+    ipiv=(int *)(a+a_sz);
+
+    /* store A (column major!) into a and B into x */
+	  for(i=0; i<m; i++){
+		  for(j=0; j<m; j++)
+        a[i+j*m]=A[i*m+j];
+
+      x[i]=B[i];
+    }
+
+  /* LU decomposition for A */
+	GETRF((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info);  
+	if(info!=0){
+		if(info<0){
+      fprintf(stderr, RCAT(RCAT("argument %d of ", GETRF) " illegal in ", AX_EQ_B_LU) "()\n", -info);
+			exit(1);
+		}
+		else{
+      fprintf(stderr, RCAT(RCAT("singular matrix A for ", GETRF) " in ", AX_EQ_B_LU) "()\n");
+#ifndef LINSOLVERS_RETAIN_MEMORY
+      free(buf);
+#endif
+
+			return 0;
+		}
+	}
+
+  /* solve the system with the computed LU */
+  GETRS("N", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info);
+	if(info!=0){
+		if(info<0){
+			fprintf(stderr, RCAT(RCAT("argument %d of ", GETRS) " illegal in ", AX_EQ_B_LU) "()\n", -info);
+			exit(1);
+		}
+		else{
+			fprintf(stderr, RCAT(RCAT("unknown error for ", GETRS) " in ", AX_EQ_B_LU) "()\n");
+#ifndef LINSOLVERS_RETAIN_MEMORY
+      free(buf);
+#endif
+
+			return 0;
+		}
+	}
+
+#ifndef LINSOLVERS_RETAIN_MEMORY
+  free(buf);
+#endif
+
+	return 1;
+}
+
+/*
+ * This function returns the solution of Ax = b
+ *
+ * The function is based on SVD decomposition:
+ * If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes
+ * (U D V^T) x = b or x=V D^{-1} U^T b
+ * Note that V D^{-1} U^T is the pseudoinverse A^+
+ *
+ * A is mxm, b is mx1.
+ *
+ * The function returns 0 in case of error, 1 if successful
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int AX_EQ_B_SVD(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
+{
+__STATIC__ LM_REAL *buf=NULL;
+__STATIC__ int buf_sz=0;
+static LM_REAL eps=LM_CNST(-1.0);
+
+register int i, j;
+LM_REAL *a, *u, *s, *vt, *work;
+int a_sz, u_sz, s_sz, vt_sz, tot_sz;
+LM_REAL thresh, one_over_denom;
+register LM_REAL sum;
+int info, rank, worksz, *iwork, iworksz;
+   
+    if(!A)
+#ifdef LINSOLVERS_RETAIN_MEMORY
+    {
+      if(buf) free(buf);
+      buf=NULL;
+      buf_sz=0;
+
+      return 1;
+    }
+#else
+      return 1; /* NOP */
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+   
+  /* calculate required memory size */
+  worksz=-1; // workspace query. Keep in mind that GESDD requires more memory than GESVD
+  /* note that optimal work size is returned in thresh */
+  GESVD("A", "A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (LM_REAL *)&thresh, (int *)&worksz, &info);
+  //GESDD("A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (LM_REAL *)&thresh, (int *)&worksz, NULL, &info);
+  worksz=(int)thresh;
+  iworksz=8*m;
+  a_sz=m*m;
+  u_sz=m*m; s_sz=m; vt_sz=m*m;
+
+  tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL) + iworksz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
+
+#ifdef LINSOLVERS_RETAIN_MEMORY
+  if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
+    if(buf) free(buf); /* free previously allocated memory */
+
+    buf_sz=tot_sz;
+    buf=(LM_REAL *)malloc(buf_sz);
+    if(!buf){
+      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
+      exit(1);
+    }
+  }
+#else
+    buf_sz=tot_sz;
+    buf=(LM_REAL *)malloc(buf_sz);
+    if(!buf){
+      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
+      exit(1);
+    }
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+
+  a=buf;
+  u=a+a_sz;
+  s=u+u_sz;
+  vt=s+s_sz;
+  work=vt+vt_sz;
+  iwork=(int *)(work+worksz);
+
+  /* store A (column major!) into a */
+  for(i=0; i<m; i++)
+    for(j=0; j<m; j++)
+      a[i+j*m]=A[i*m+j];
+
+  /* SVD decomposition of A */
+  GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info);
+  //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info);
+
+  /* error treatment */
+  if(info!=0){
+    if(info<0){
+      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", AX_EQ_B_SVD) "()\n", -info);
+      exit(1);
+    }
+    else{
+      fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", AX_EQ_B_SVD) "() [info=%d]\n", info);
+#ifndef LINSOLVERS_RETAIN_MEMORY
+      free(buf);
+#endif
+
+      return 0;
+    }
+  }
+
+  if(eps<0.0){
+    LM_REAL aux;
+
+    /* compute machine epsilon */
+    for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5))
+                                          ;
+    eps*=LM_CNST(2.0);
+  }
+
+  /* compute the pseudoinverse in a */
+	for(i=0; i<a_sz; i++) a[i]=0.0; /* initialize to zero */
+  for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
+    one_over_denom=LM_CNST(1.0)/s[rank];
+
+    for(j=0; j<m; j++)
+      for(i=0; i<m; i++)
+        a[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom;
+  }
+
+	/* compute A^+ b in x */
+	for(i=0; i<m; i++){
+	  for(j=0, sum=0.0; j<m; j++)
+      sum+=a[i*m+j]*B[j];
+    x[i]=sum;
+  }
+
+#ifndef LINSOLVERS_RETAIN_MEMORY
+  free(buf);
+#endif
+
+	return 1;
+}
+
+/*
+ * This function returns the solution of Ax = b for a real symmetric matrix A
+ *
+ * The function is based on LDLT factorization with the pivoting
+ * strategy of Bunch and Kaufman:
+ * A is factored as L*D*L^T where L is lower triangular and
+ * D symmetric and block diagonal (aka spectral decomposition,
+ * Banachiewicz factorization, modified Cholesky factorization)
+ *
+ * A is mxm, b is mx1.
+ *
+ * The function returns 0 in case of error, 1 if successfull
+ *
+ * This function is often called repetitively to solve problems of identical
+ * dimensions. To avoid repetitive malloc's and free's, allocated memory is
+ * retained between calls and free'd-malloc'ed when not of the appropriate size.
+ * A call with NULL as the first argument forces this memory to be released.
+ */
+int AX_EQ_B_BK(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
+{
+__STATIC__ LM_REAL *buf=NULL;
+__STATIC__ int buf_sz=0, nb=0;
+
+LM_REAL *a, *work;
+int a_sz, ipiv_sz, work_sz, tot_sz;
+int info, *ipiv, nrhs=1;
+   
+  if(!A)
+#ifdef LINSOLVERS_RETAIN_MEMORY
+  {
+    if(buf) free(buf);
+    buf=NULL;
+    buf_sz=0;
+
+    return 1;
+  }
+#else
+  return 1; /* NOP */
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+
+  /* calculate required memory size */
+  ipiv_sz=m;
+  a_sz=m*m;
+  if(!nb){
+    LM_REAL tmp;
+
+    work_sz=-1; // workspace query; optimal size is returned in tmp
+    SYTRF("L", (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&work_sz, (int *)&info);
+    nb=((int)tmp)/m; // optimal worksize is m*nb
+  }
+  work_sz=(nb!=-1)? nb*m : 1;
+  tot_sz=(a_sz + work_sz)*sizeof(LM_REAL) + ipiv_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
+
+#ifdef LINSOLVERS_RETAIN_MEMORY
+  if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
+    if(buf) free(buf); /* free previously allocated memory */
+
+    buf_sz=tot_sz;
+    buf=(LM_REAL *)malloc(buf_sz);
+    if(!buf){
+      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_BK) "() failed!\n");
+      exit(1);
+    }
+  }
+#else
+  buf_sz=tot_sz;
+  buf=(LM_REAL *)malloc(buf_sz);
+  if(!buf){
+    fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_BK) "() failed!\n");
+    exit(1);
+  }
+#endif /* LINSOLVERS_RETAIN_MEMORY */
+
+  a=buf;
+  work=a+a_sz;
+  ipiv=(int *)(work+work_sz);
+
+  /* store A into a and B into x; A is assumed to be symmetric, hence
+   * the column and row major order representations are the same
+   */
+  memcpy(a, A, a_sz*sizeof(LM_REAL));
+  memcpy(x, B, m*sizeof(LM_REAL));
+
+  /* LDLt factorization for A */
+	SYTRF("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info);
+	if(info!=0){
+		if(info<0){
+      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", SYTRF) " in ", AX_EQ_B_BK) "()\n", -info);
+			exit(1);
+		}
+		else{
+      fprintf(stderr, RCAT(RCAT("LAPACK error: singular block diagonal matrix D for", SYTRF) " in ", AX_EQ_B_BK)"() [D(%d, %d) is zero]\n", info, info);
+#ifndef LINSOLVERS_RETAIN_MEMORY
+      free(buf);
+#endif
+
+			return 0;
+		}
+	}
+
+  /* solve the system with the computed factorization */
+  SYTRS("L", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info);
+  if(info<0){
+    fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", SYTRS) " in ", AX_EQ_B_BK) "()\n", -info);
+    exit(1);
+	}
+
+#ifndef LINSOLVERS_RETAIN_MEMORY
+  free(buf);
+#endif
+
+	return 1;
+}
+
+/* undefine all. IT MUST REMAIN IN THIS POSITION IN FILE */
+#undef AX_EQ_B_QR
+#undef AX_EQ_B_QRLS
+#undef AX_EQ_B_CHOL
+#undef AX_EQ_B_LU
+#undef AX_EQ_B_SVD
+#undef AX_EQ_B_BK
+
+#undef GEQRF
+#undef ORGQR
+#undef TRTRS
+#undef POTF2
+#undef POTRF
+#undef POTRS
+#undef GETRF
+#undef GETRS
+#undef GESVD
+#undef GESDD
+#undef SYTRF
+#undef SYTRS
+
+/******************************************************************************** 
+ * Miscelaneous functions for Levenberg-Marquardt nonlinear minimization.
+ ********************************************************************************/
+
+/////////////////////////////////////////////////////////////////////////////////
+// 
+//  Levenberg - Marquardt non-linear minimization algorithm
+//  Copyright (C) 2004-05  Manolis Lourakis (lourakis at ics forth gr)
+//  Institute of Computer Science, Foundation for Research & Technology - Hellas
+//  Heraklion, Crete, Greece.
+//
+//  This program is free software; you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation; either version 2 of the License, or
+//  (at your option) any later version.
+//
+//  This program is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//
+/////////////////////////////////////////////////////////////////////////////////
+
+/* precision-specific definitions */
+#define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult)
+#define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar)
+#define LEVMAR_L2NRMXMY LM_ADD_PREFIX(levmar_L2nrmxmy)
+
+#define LEVMAR_PSEUDOINVERSE LM_ADD_PREFIX(levmar_pseudoinverse)
+static int LEVMAR_PSEUDOINVERSE(LM_REAL *A, LM_REAL *B, int m);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* BLAS matrix multiplication, LAPACK SVD & Cholesky routines */
+#define GEMM LM_MK_BLAS_NAME(gemm)
+/* C := alpha*op( A )*op( B ) + beta*C */
+extern void GEMM(char *transa, char *transb, int *m, int *n, int *k,
+          LM_REAL *alpha, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, LM_REAL *beta, LM_REAL *c, int *ldc);
+
+#define GESVD LM_MK_LAPACK_NAME(gesvd)
+#define GESDD LM_MK_LAPACK_NAME(gesdd)
+extern int GESVD(char *jobu, char *jobvt, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu,
+                 LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *info);
+
+/* lapack 3.0 new SVD routine, faster than xgesvd() */
+extern int GESDD(char *jobz, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt,
+                 LM_REAL *work, int *lwork, int *iwork, int *info);
+
+/* Cholesky decomposition */
+#define POTF2 LM_MK_LAPACK_NAME(potf2)
+extern int POTF2(char *uplo, int *n, LM_REAL *a, int *lda, int *info);
+#ifdef __cplusplus
+}
+#endif
+
+/* blocked multiplication of the transpose of the nxm matrix a with itself (i.e. a^T a)
+ * using a block size of bsize. The product is returned in b.
+ * Since a^T a is symmetric, its computation can be sped up by computing only its
+ * upper triangular part and copying it to the lower part.
+ *
+ * More details on blocking can be found at 
+ * http://www-2.cs.cmu.edu/afs/cs/academic/class/15213-f02/www/R07/section_a/Recitation07-SectionA.pdf
+ */
+void LEVMAR_TRANS_MAT_MAT_MULT(LM_REAL *a, LM_REAL *b, int n, int m)
+{
+LM_REAL alpha=LM_CNST(1.0), beta=LM_CNST(0.0);
+  /* Fool BLAS to compute a^T*a avoiding transposing a: a is equivalent to a^T in column major,
+   * therefore BLAS computes a*a^T with a and a*a^T in column major, which is equivalent to
+   * computing a^T*a in row major!
+   */
+  GEMM("N", "T", &m, &m, &n, &alpha, a, &m, a, &m, &beta, b, &m);
+}
+
+/* 
+ * Check the Jacobian of a n-valued nonlinear function in m variables
+ * evaluated at a point p, for consistency with the function itself.
+ *
+ * Based on fortran77 subroutine CHKDER by
+ * Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More
+ * Argonne National Laboratory. MINPACK project. March 1980.
+ *
+ *
+ * func points to a function from R^m --> R^n: Given a p in R^m it yields hx in R^n
+ * jacf points to a function implementing the Jacobian of func, whose correctness
+ *     is to be tested. Given a p in R^m, jacf computes into the nxm matrix j the
+ *     Jacobian of func at p. Note that row i of j corresponds to the gradient of
+ *     the i-th component of func, evaluated at p.
+ * p is an input array of length m containing the point of evaluation.
+ * m is the number of variables
+ * n is the number of functions
+ * adata points to possible additional data and is passed uninterpreted
+ *     to func, jacf.
+ * err is an array of length n. On output, err contains measures
+ *     of correctness of the respective gradients. if there is
+ *     no severe loss of significance, then if err[i] is 1.0 the
+ *     i-th gradient is correct, while if err[i] is 0.0 the i-th
+ *     gradient is incorrect. For values of err between 0.0 and 1.0,
+ *     the categorization is less certain. In general, a value of
+ *     err[i] greater than 0.5 indicates that the i-th gradient is
+ *     probably correct, while a value of err[i] less than 0.5
+ *     indicates that the i-th gradient is probably incorrect.
+ *
+ *
+ * The function does not perform reliably if cancellation or
+ * rounding errors cause a severe loss of significance in the
+ * evaluation of a function. therefore, none of the components
+ * of p should be unusually small (in particular, zero) or any
+ * other value which may cause loss of significance.
+ */
+
+/*
+ * This function computes the pseudoinverse of a square matrix A
+ * into B using SVD. A and B can coincide
+ * 
+ * The function returns 0 in case of error (e.g. A is singular),
+ * the rank of A if successful
+ *
+ * A, B are mxm
+ *
+ */
+static int LEVMAR_PSEUDOINVERSE(LM_REAL *A, LM_REAL *B, int m)
+{
+LM_REAL *buf=NULL;
+int buf_sz=0;
+static LM_REAL eps=LM_CNST(-1.0);
+
+register int i, j;
+LM_REAL *a, *u, *s, *vt, *work;
+int a_sz, u_sz, s_sz, vt_sz, tot_sz;
+LM_REAL thresh, one_over_denom;
+int info, rank, worksz, *iwork, iworksz;
+   
+  /* calculate required memory size */
+  worksz=5*m; // min worksize for GESVD
+  //worksz=m*(7*m+4); // min worksize for GESDD
+  iworksz=8*m;
+  a_sz=m*m;
+  u_sz=m*m; s_sz=m; vt_sz=m*m;
+
+  tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL) + iworksz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
+
+    buf_sz=tot_sz;
+    buf=(LM_REAL *)malloc(buf_sz);
+    if(!buf){
+      fprintf(stderr, RCAT("memory allocation in ", LEVMAR_PSEUDOINVERSE) "() failed!\n");
+      return 0; /* error */
+    }
+
+  a=buf;
+  u=a+a_sz;
+  s=u+u_sz;
+  vt=s+s_sz;
+  work=vt+vt_sz;
+  iwork=(int *)(work+worksz);
+
+  /* store A (column major!) into a */
+  for(i=0; i<m; i++)
+    for(j=0; j<m; j++)
+      a[i+j*m]=A[i*m+j];
+
+  /* SVD decomposition of A */
+  GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info);
+  //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info);
+
+  /* error treatment */
+  if(info!=0){
+    if(info<0){
+      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", LEVMAR_PSEUDOINVERSE) "()\n", -info);
+    }
+    else{
+      fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", LEVMAR_PSEUDOINVERSE) "() [info=%d]\n", info);
+    }
+    free(buf);
+    return 0;
+  }
+
+  if(eps<0.0){
+    LM_REAL aux;
+
+    /* compute machine epsilon */
+    for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5))
+                                          ;
+    eps*=LM_CNST(2.0);
+  }
+
+  /* compute the pseudoinverse in B */
+	for(i=0; i<a_sz; i++) B[i]=0.0; /* initialize to zero */
+  for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
+    one_over_denom=LM_CNST(1.0)/s[rank];
+
+    for(j=0; j<m; j++)
+      for(i=0; i<m; i++)
+        B[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom;
+  }
+
+  free(buf);
+
+	return rank;
+}
+
+/*
+ * This function computes in C the covariance matrix corresponding to a least
+ * squares fit. JtJ is the approximate Hessian at the solution (i.e. J^T*J, where
+ * J is the Jacobian at the solution), sumsq is the sum of squared residuals
+ * (i.e. goodnes of fit) at the solution, m is the number of parameters (variables)
+ * and n the number of observations. JtJ can coincide with C.
+ * 
+ * if JtJ is of full rank, C is computed as sumsq/(n-m)*(JtJ)^-1
+ * otherwise and if LAPACK is available, C=sumsq/(n-r)*(JtJ)^+
+ * where r is JtJ's rank and ^+ denotes the pseudoinverse
+ * The diagonal of C is made up from the estimates of the variances
+ * of the estimated regression coefficients.
+ * See the documentation of routine E04YCF from the NAG fortran lib
+ *
+ * The function returns the rank of JtJ if successful, 0 on error
+ *
+ * A and C are mxm
+ *
+ */
+int LEVMAR_COVAR(LM_REAL *JtJ, LM_REAL *C, LM_REAL sumsq, int m, int n)
+{
+register int i;
+int rnk;
+LM_REAL fact;
+
+   rnk=LEVMAR_PSEUDOINVERSE(JtJ, C, m);
+   if(!rnk) return 0;
+
+   fact=sumsq/(LM_REAL)(n-rnk);
+   for(i=0; i<m*m; ++i)
+     C[i]*=fact;
+
+   return rnk;
+}
+
+/* Compute e=x-y for two n-vectors x and y and return the squared L2 norm of e.
+ * e can coincide with either x or y; x can be NULL, in which case it is assumed
+ * to be equal to the zero vector.
+ * Uses loop unrolling and blocking to reduce bookkeeping overhead & pipeline
+ * stalls and increase instruction-level parallelism; see http://www.abarnett.demon.co.uk/tutorial.html
+ */
+
+LM_REAL LEVMAR_L2NRMXMY(LM_REAL *e, LM_REAL *x, LM_REAL *y, int n)
+{
+const int blocksize=8, bpwr=3; /* 8=2^3 */
+register int i;
+int j1, j2, j3, j4, j5, j6, j7;
+int blockn;
+register LM_REAL sum0=0.0, sum1=0.0, sum2=0.0, sum3=0.0;
+
+  /* n may not be divisible by blocksize, 
+   * go as near as we can first, then tidy up.
+   */ 
+  blockn = (n>>bpwr)<<bpwr; /* (n / blocksize) * blocksize; */
+
+  /* unroll the loop in blocks of `blocksize'; looping downwards gains some more speed */
+  if(x){
+    for(i=blockn-1; i>0; i-=blocksize){
+              e[i ]=x[i ]-y[i ]; sum0+=e[i ]*e[i ];
+      j1=i-1; e[j1]=x[j1]-y[j1]; sum1+=e[j1]*e[j1];
+      j2=i-2; e[j2]=x[j2]-y[j2]; sum2+=e[j2]*e[j2];
+      j3=i-3; e[j3]=x[j3]-y[j3]; sum3+=e[j3]*e[j3];
+      j4=i-4; e[j4]=x[j4]-y[j4]; sum0+=e[j4]*e[j4];
+      j5=i-5; e[j5]=x[j5]-y[j5]; sum1+=e[j5]*e[j5];
+      j6=i-6; e[j6]=x[j6]-y[j6]; sum2+=e[j6]*e[j6];
+      j7=i-7; e[j7]=x[j7]-y[j7]; sum3+=e[j7]*e[j7];
+    }
+
+   /*
+    * There may be some left to do.
+    * This could be done as a simple for() loop, 
+    * but a switch is faster (and more interesting) 
+    */ 
+
+    i=blockn;
+    if(i<n){ 
+      /* Jump into the case at the place that will allow
+       * us to finish off the appropriate number of items. 
+       */ 
+
+      switch(n - i){ 
+        case 7 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
+        case 6 : e[i]=x[i]-y[i]; sum1+=e[i]*e[i]; ++i;
+        case 5 : e[i]=x[i]-y[i]; sum2+=e[i]*e[i]; ++i;
+        case 4 : e[i]=x[i]-y[i]; sum3+=e[i]*e[i]; ++i;
+        case 3 : e[i]=x[i]-y[i]; sum0+=e[i]*e[i]; ++i;
+        case 2 : e[i]=x[i]-y[i]; sum1+=e[i]*e[i]; ++i;
+        case 1 : e[i]=x[i]-y[i]; sum2+=e[i]*e[i]; //++i;
+      }
+    }
+  }
+  else{ /* x==0 */
+    for(i=blockn-1; i>0; i-=blocksize){
+              e[i ]=-y[i ]; sum0+=e[i ]*e[i ];
+      j1=i-1; e[j1]=-y[j1]; sum1+=e[j1]*e[j1];
+      j2=i-2; e[j2]=-y[j2]; sum2+=e[j2]*e[j2];
+      j3=i-3; e[j3]=-y[j3]; sum3+=e[j3]*e[j3];
+      j4=i-4; e[j4]=-y[j4]; sum0+=e[j4]*e[j4];
+      j5=i-5; e[j5]=-y[j5]; sum1+=e[j5]*e[j5];
+      j6=i-6; e[j6]=-y[j6]; sum2+=e[j6]*e[j6];
+      j7=i-7; e[j7]=-y[j7]; sum3+=e[j7]*e[j7];
+    }
+
+   /*
+    * There may be some left to do.
+    * This could be done as a simple for() loop, 
+    * but a switch is faster (and more interesting) 
+    */ 
+
+    i=blockn;
+    if(i<n){ 
+      /* Jump into the case at the place that will allow
+       * us to finish off the appropriate number of items. 
+       */ 
+
+      switch(n - i){ 
+        case 7 : e[i]=-y[i]; sum0+=e[i]*e[i]; ++i;
+        case 6 : e[i]=-y[i]; sum1+=e[i]*e[i]; ++i;
+        case 5 : e[i]=-y[i]; sum2+=e[i]*e[i]; ++i;
+        case 4 : e[i]=-y[i]; sum3+=e[i]*e[i]; ++i;
+        case 3 : e[i]=-y[i]; sum0+=e[i]*e[i]; ++i;
+        case 2 : e[i]=-y[i]; sum1+=e[i]*e[i]; ++i;
+        case 1 : e[i]=-y[i]; sum2+=e[i]*e[i]; //++i;
+      }
+    }
+  }
+
+  return sum0+sum1+sum2+sum3;
+}
+
+/* undefine everything. THIS MUST REMAIN AT THE END OF THE FILE */
+#undef POTF2
+#undef GESVD
+#undef GESDD
+#undef GEMM
+#undef LEVMAR_PSEUDOINVERSE
+#undef LEVMAR_LUINVERSE
+#undef LEVMAR_COVAR
+#undef LEVMAR_TRANS_MAT_MAT_MULT
+#undef LEVMAR_L2NRMXMY
+
+/////////////////////////////////////////////////////////////////////////////////
+// 
+//  Levenberg - Marquardt non-linear minimization algorithm
+//  Copyright (C) 2004  Manolis Lourakis (lourakis at ics forth gr)
+//  Institute of Computer Science, Foundation for Research & Technology - Hellas
+//  Heraklion, Crete, Greece.
+//
+//  This program is free software; you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation; either version 2 of the License, or
+//  (at your option) any later version.
+//
+//  This program is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//
+/////////////////////////////////////////////////////////////////////////////////
+
+/******************************************************************************** 
+ * Levenberg-Marquardt nonlinear minimization. The same core code is used with
+ * appropriate #defines to derive single and double precision versions, see
+ * also lm_core.c
+ ********************************************************************************/
+
+#define EPSILON       1E-12
+#define ONE_THIRD     0.3333333334 /* 1.0/3.0 */
+
+/* double precision definitions */
+
+/////////////////////////////////////////////////////////////////////////////////
+// 
+//  Levenberg - Marquardt non-linear minimization algorithm
+//  Copyright (C) 2004  Manolis Lourakis (lourakis at ics forth gr)
+//  Institute of Computer Science, Foundation for Research & Technology - Hellas
+//  Heraklion, Crete, Greece.
+//
+//  This program is free software; you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation; either version 2 of the License, or
+//  (at your option) any later version.
+//
+//  This program is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//
+/////////////////////////////////////////////////////////////////////////////////
+
+/* precision-specific definitions */
+#define LEVMAR_DER LM_ADD_PREFIX(levmar_der)
+#define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult)
+#define LEVMAR_L2NRMXMY LM_ADD_PREFIX(levmar_L2nrmxmy)
+#define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar)
+
+#define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU)
+#define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol)
+#define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR)
+#define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS)
+#define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD)
+#define AX_EQ_B_BK LM_ADD_PREFIX(Ax_eq_b_BK)
+
+/* 
+ * This function seeks the parameter vector p that best describes the measurements vector x.
+ * More precisely, given a vector function  func : R^m --> R^n with n>=m,
+ * it finds p s.t. func(p) ~= x, i.e. the squared second order (i.e. L2) norm of
+ * e=x-func(p) is minimized.
+ *
+ * This function requires an analytic Jacobian. In case the latter is unavailable,
+ * use LEVMAR_DIF() bellow
+ *
+ * Returns the number of iterations (>=0) if successful, LM_ERROR if failed
+ *
+ * For more details, see K. Madsen, H.B. Nielsen and O. Tingleff's lecture notes on 
+ * non-linear least squares at http://www.imm.dtu.dk/pubdb/views/edoc_download.php/3215/pdf/imm3215.pdf
+ */
+
+int LEVMAR_DER(
+  void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in  R^n */
+  void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata),  /* function to evaluate the Jacobian \part x / \part p */ 
+  LM_REAL *p,         /* I/O: initial parameter estimates. On output has the estimated solution */
+  LM_REAL *x,         /* I: measurement vector. NULL implies a zero vector */
+  int m,              /* I: parameter vector dimension (i.e. #unknowns) */
+  int n,              /* I: measurement vector dimension */
+  int itmax,          /* I: maximum number of iterations */
+  LM_REAL opts[4],    /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu,
+                       * stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2. Set to NULL for defaults to be used
+                       */
+  LM_REAL info[LM_INFO_SZ],
+					           /* O: information regarding the minimization. Set to NULL if don't care
+                      * info[0]= ||e||_2 at initial p.
+                      * info[1-4]=[ ||e||_2, ||J^T e||_inf,  ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
+                      * info[5]= # iterations,
+                      * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
+                      *                                 2 - stopped by small Dp
+                      *                                 3 - stopped by itmax
+                      *                                 4 - singular matrix. Restart from current p with increased mu 
+                      *                                 5 - no further error reduction is possible. Restart with increased mu
+                      *                                 6 - stopped by small ||e||_2
+                      *                                 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error
+                      * info[7]= # function evaluations
+                      * info[8]= # Jacobian evaluations
+                      * info[9]= # linear systems solved, i.e. # attempts for reducing error
+                      */
+  LM_REAL *work,     /* working memory at least LM_DER_WORKSZ() reals large, allocated if NULL */
+  LM_REAL *covar,    /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */
+  void *adata)       /* pointer to possibly additional data, passed uninterpreted to func & jacf.
+                      * Set to NULL if not needed
+                      */
+{
+register int i, j, k, l;
+int worksz, freework=0, issolved;
+/* temp work arrays */
+LM_REAL *e,          /* nx1 */
+       *hx,         /* \hat{x}_i, nx1 */
+       *jacTe,      /* J^T e_i mx1 */
+       *jac,        /* nxm */
+       *jacTjac,    /* mxm */
+       *Dp,         /* mx1 */
+   *diag_jacTjac,   /* diagonal of J^T J, mx1 */
+       *pDp;        /* p + Dp, mx1 */
+
+register LM_REAL mu,  /* damping constant */
+                tmp; /* mainly used in matrix & vector multiplications */
+LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */
+LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL;
+LM_REAL tau, eps1, eps2, eps2_sq, eps3;
+LM_REAL init_p_eL2;
+int nu=2, nu2, stop=0, nfev, njev=0, nlss=0;
+const int nm=n*m;
+int (*linsolver)(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)=NULL;
+
+  mu=jacTe_inf=0.0; /* -Wall */
+
+  if(n<m){
+    fprintf(stderr, LCAT(LEVMAR_DER, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m);
+    return LM_ERROR;
+  }
+
+  if(!jacf){
+    fprintf(stderr, RCAT("No function specified for computing the Jacobian in ", LEVMAR_DER) "()\n");
+    return LM_ERROR;
+  }
+
+  if(opts){
+	  tau=opts[0];
+	  eps1=opts[1];
+	  eps2=opts[2];
+	  eps2_sq=opts[2]*opts[2];
+    eps3=opts[3];
+  }
+  else{ // use default values
+	  tau=LM_CNST(LM_INIT_MU);
+	  eps1=LM_CNST(LM_STOP_THRESH);
+	  eps2=LM_CNST(LM_STOP_THRESH);
+	  eps2_sq=LM_CNST(LM_STOP_THRESH)*LM_CNST(LM_STOP_THRESH);
+    eps3=LM_CNST(LM_STOP_THRESH);
+  }
+
+  if(!work){
+    worksz=LM_DER_WORKSZ(m, n); //2*n+4*m + n*m + m*m;
+    work=(LM_REAL *)malloc(worksz*sizeof(LM_REAL)); /* allocate a big chunk in one step */
+    if(!work){
+      fprintf(stderr, LCAT(LEVMAR_DER, "(): memory allocation request failed\n"));
+      return LM_ERROR;
+    }
+    freework=1;
+  }
+
+  /* set up work arrays */
+  e=work;
+  hx=e + n;
+  jacTe=hx + n;
+  jac=jacTe + m;
+  jacTjac=jac + nm;
+  Dp=jacTjac + m*m;
+  diag_jacTjac=Dp + m;
+  pDp=diag_jacTjac + m;
+
+  /* compute e=x - f(p) and its L2 norm */
+  (*func)(p, hx, m, n, adata); nfev=1;
+  /* ### e=x-hx, p_eL2=||e|| */
+
+  p_eL2=LEVMAR_L2NRMXMY(e, x, hx, n);  
+
+  init_p_eL2=p_eL2;
+  if(!finite(p_eL2)) stop=7;
+
+  for(k=0; k<itmax && !stop; ++k){
+    /* Note that p and e have been updated at a previous iteration */
+
+    for (i=0; i<12; i++)
+      printf("%8.4f", p[i]);
+    printf("\n");
+
+    if(p_eL2<=eps3){ /* error is small */
+      stop=6;
+      break;
+    }
+
+    /* Compute the Jacobian J at p,  J^T J,  J^T e,  ||J^T e||_inf and ||p||^2.
+     * Since J^T J is symmetric, its computation can be sped up by computing
+     * only its upper triangular part and copying it to the lower part
+     */
+
+    (*jacf)(p, jac, m, n, adata); ++njev;
+
+    /* J^T J, J^T e */
+    if(nm<__BLOCKSZ__SQ){ // this is a small problem
+      /* J^T*J_ij = \sum_l J^T_il * J_lj = \sum_l J_li * J_lj.
+       * Thus, the product J^T J can be computed using an outer loop for
+       * l that adds J_li*J_lj to each element ij of the result. Note that
+       * with this scheme, the accesses to J and JtJ are always along rows,
+       * therefore induces less cache misses compared to the straightforward
+       * algorithm for computing the product (i.e., l loop is innermost one).
+       * A similar scheme applies to the computation of J^T e.
+       * However, for large minimization problems (i.e., involving a large number
+       * of unknowns and measurements) for which J/J^T J rows are too large to
+       * fit in the L1 cache, even this scheme incures many cache misses. In
+       * such cases, a cache-efficient blocking scheme is preferable.
+       *
+       * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this
+       * performance problem.
+       *
+       * Note that the non-blocking algorithm is faster on small
+       * problems since in this case it avoids the overheads of blocking. 
+       */
+
+      /* looping downwards saves a few computations */
+      register int l;
+      register LM_REAL alpha, *jaclm, *jacTjacim;
+
+      for(i=m*m; i-->0; )
+        jacTjac[i]=0.0;
+      for(i=m; i-->0; )
+        jacTe[i]=0.0;
+
+      for(l=n; l-->0; ){
+        jaclm=jac+l*m;
+        for(i=m; i-->0; ){
+          jacTjacim=jacTjac+i*m;
+          alpha=jaclm[i]; //jac[l*m+i];
+          for(j=i+1; j-->0; ) /* j<=i computes lower triangular part only */
+            jacTjacim[j]+=jaclm[j]*alpha; //jacTjac[i*m+j]+=jac[l*m+j]*alpha
+
+          /* J^T e */
+          jacTe[i]+=alpha*e[l];
+        }
+      }
+
+      for(i=m; i-->0; ) /* copy to upper part */
+        for(j=i+1; j<m; ++j)
+          jacTjac[i*m+j]=jacTjac[j*m+i];
+
+    }
+    else{ // this is a large problem
+      /* Cache efficient computation of J^T J based on blocking
+       */
+      LEVMAR_TRANS_MAT_MAT_MULT(jac, jacTjac, n, m);
+
+      /* cache efficient computation of J^T e */
+      for(i=0; i<m; ++i)
+        jacTe[i]=0.0;
+
+      for(i=0; i<n; ++i){
+        register LM_REAL *jacrow;
+
+        for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l)
+          jacTe[l]+=jacrow[l]*tmp;
+      }
+    }
+
+	  /* Compute ||J^T e||_inf and ||p||^2 */
+    for(i=0, p_L2=jacTe_inf=0.0; i<m; ++i){
+      if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp;
+
+      diag_jacTjac[i]=jacTjac[i*m+i]; /* save diagonal entries so that augmentation can be later canceled */
+      p_L2+=p[i]*p[i];
+    }
+    //p_L2=sqrt(p_L2);
+
+    /* check for convergence */
+    if((jacTe_inf <= eps1)){
+      Dp_L2=0.0; /* no increment for p in this case */
+      stop=1;
+      break;
+    }
+
+   /* compute initial damping factor */
+    if(k==0){
+      for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
+        if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */
+      mu=tau*tmp;
+    }
+
+    /* determine increment using adaptive damping */
+    while(1){
+      /* augment normal equations */
+      for(i=0; i<m; ++i)
+        jacTjac[i*m+i]+=mu;
+
+      /* solve augmented equations */
+      /* 7 alternatives are available: LU, Cholesky + Cholesky with PLASMA, LDLt, 2 variants of QR decomposition and SVD.
+       * For matrices with dimensions of at least a few hundreds, the PLASMA implementation of Cholesky is the fastest.
+       * From the serial solvers, Cholesky is the fastest but might occasionally be inapplicable due to numerical round-off;
+       * QR is slower but more robust; SVD is the slowest but most robust; LU is quite robust but
+       * slower than LDLt; LDLt offers a good tradeoff between robustness and speed
+       */
+
+      issolved=AX_EQ_B_BK(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_BK;
+
+      if(issolved){
+        /* compute p's new estimate and ||Dp||^2 */
+        for(i=0, Dp_L2=0.0; i<m; ++i){
+          pDp[i]=p[i] + (tmp=Dp[i]);
+          Dp_L2+=tmp*tmp;
+        }
+        //Dp_L2=sqrt(Dp_L2);
+
+        if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
+        //if(Dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */
+          stop=2;
+          break;
+        }
+
+       if(Dp_L2>=(p_L2+eps2)/(LM_CNST(EPSILON)*LM_CNST(EPSILON))){ /* almost singular */
+       //if(Dp_L2>=(p_L2+eps2)/LM_CNST(EPSILON)){ /* almost singular */
+         stop=4;
+         break;
+       }
+
+        (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */
+        /* compute ||e(pDp)||_2 */
+        /* ### hx=x-hx, pDp_eL2=||hx|| */
+
+        pDp_eL2=LEVMAR_L2NRMXMY(hx, x, hx, n);
+
+        if(!finite(pDp_eL2)){ /* sum of squares is not finite, most probably due to a user error.
+                                  * This check makes sure that the inner loop does not run indefinitely.
+                                  * Thanks to Steve Danauskas for reporting such cases
+                                  */
+          stop=7;
+          break;
+        }
+
+        for(i=0, dL=0.0; i<m; ++i)
+          dL+=Dp[i]*(mu*Dp[i]+jacTe[i]);
+
+        dF=p_eL2-pDp_eL2;
+
+        if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
+          tmp=(LM_CNST(2.0)*dF/dL-LM_CNST(1.0));
+          tmp=LM_CNST(1.0)-tmp*tmp*tmp;
+          mu=mu*( (tmp>=LM_CNST(ONE_THIRD))? tmp : LM_CNST(ONE_THIRD) );
+          nu=2;
+
+          for(i=0 ; i<m; ++i) /* update p's estimate */
+            p[i]=pDp[i];
+
+          for(i=0; i<n; ++i) /* update e and ||e||_2 */
+            e[i]=hx[i];
+          p_eL2=pDp_eL2;
+          break;
+        }
+      }
+
+      /* if this point is reached, either the linear system could not be solved or
+       * the error did not reduce; in any case, the increment must be rejected
+       */
+
+      mu*=nu;
+      nu2=nu<<1; // 2*nu;
+      if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */
+        stop=5;
+        break;
+      }
+      nu=nu2;
+
+      for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
+        jacTjac[i*m+i]=diag_jacTjac[i];
+    } /* inner loop */
+  }
+
+  if(k>=itmax) stop=3;
+
+  for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
+    jacTjac[i*m+i]=diag_jacTjac[i];
+
+  if(info){
+    info[0]=init_p_eL2;
+    info[1]=p_eL2;
+    info[2]=jacTe_inf;
+    info[3]=Dp_L2;
+    for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
+      if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i];
+    info[4]=mu/tmp;
+    info[5]=(LM_REAL)k;
+    info[6]=(LM_REAL)stop;
+    info[7]=(LM_REAL)nfev;
+    info[8]=(LM_REAL)njev;
+    info[9]=(LM_REAL)nlss;
+  }
+
+  /* covariance matrix */
+  if(covar){
+    LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n);
+  }
+
+  if(freework) free(work);
+
+#ifdef LINSOLVERS_RETAIN_MEMORY
+  if(linsolver) (*linsolver)(NULL, NULL, NULL, 0);
+#endif
+
+  return (stop!=4 && stop!=7)?  k : LM_ERROR;
+}
+
+/* undefine everything. THIS MUST REMAIN AT THE END OF THE FILE */
+#undef LEVMAR_DER
+#undef LEVMAR_COVAR
+#undef LEVMAR_TRANS_MAT_MAT_MULT
+#undef LEVMAR_L2NRMXMY
+#undef AX_EQ_B_LU
+#undef AX_EQ_B_CHOL
+#undef AX_EQ_B_QR
+#undef AX_EQ_B_QRLS
+#undef AX_EQ_B_SVD
+#undef AX_EQ_B_BK
+
+#undef LM_REAL
+#undef LM_PREFIX
+#undef LM_REAL_MAX
+#undef LM_REAL_EPSILON
+#undef LM_REAL_MIN
+#undef LM_CNST
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_levmar.h gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_levmar.h
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_levmar.h	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_levmar.h	2015-02-10 12:21:00.105450460 +0100
@@ -0,0 +1,131 @@
+/* 
+////////////////////////////////////////////////////////////////////////////////////
+// 
+//  Prototypes and definitions for the Levenberg - Marquardt minimization algorithm
+//  Copyright (C) 2004  Manolis Lourakis (lourakis at ics forth gr)
+//  Institute of Computer Science, Foundation for Research & Technology - Hellas
+//  Heraklion, Crete, Greece.
+//
+//  This program is free software; you can redistribute it and/or modify
+//  it under the terms of the GNU General Public License as published by
+//  the Free Software Foundation; either version 2 of the License, or
+//  (at your option) any later version.
+//
+//  This program is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//  GNU General Public License for more details.
+//
+////////////////////////////////////////////////////////////////////////////////////
+*/
+
+#ifndef _LEVMAR_H_
+#define _LEVMAR_H_
+
+/************************************* Start of configuration options *************************************/
+/* Note that when compiling with CMake, this configuration section is automatically generated
+ * based on the user's input, see levmar.h.in
+ */
+
+/* to avoid the overhead of repeated mallocs(), routines in Axb.c can be instructed to
+ * retain working memory between calls. Such a choice, however, renders these routines
+ * non-reentrant and is not safe in a shared memory multiprocessing environment.
+ * Bellow, an attempt is made to issue a warning if this option is turned on and OpenMP
+ * is being used (note that this will work only if omp.h is included before levmar.h)
+ */
+#define LINSOLVERS_RETAIN_MEMORY
+#if (defined(_OPENMP))
+# ifdef LINSOLVERS_RETAIN_MEMORY
+#  warning LINSOLVERS_RETAIN_MEMORY is not safe in a multithreaded environment and should be turned off!
+# endif /* LINSOLVERS_RETAIN_MEMORY */
+#endif /* _OPENMP */
+
+/****************** End of configuration options, no changes necessary beyond this point ******************/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* work arrays size for ?levmar_der and ?levmar_dif functions.
+ * should be multiplied by sizeof(double) or sizeof(float) to be converted to bytes
+ */
+#define LM_DER_WORKSZ(npar, nmeas) (2*(nmeas) + 4*(npar) + (nmeas)*(npar) + (npar)*(npar))
+#define LM_DIF_WORKSZ(npar, nmeas) (4*(nmeas) + 4*(npar) + (nmeas)*(npar) + (npar)*(npar))
+
+#define LM_OPTS_SZ    	 5 /* max(4, 5) */
+#define LM_INFO_SZ    	 10
+#define LM_ERROR         -1
+#define LM_INIT_MU    	 1E-03
+#define LM_STOP_THRESH	 1E-17
+#define LM_DIFF_DELTA    1E-06
+#define LM_VERSION       "2.6 (November 2011)"
+
+/* double precision LM, with & without Jacobian */
+/* unconstrained minimization */
+extern int dlevmar_der(
+      void (*func)(double *p, double *hx, int m, int n, void *adata),
+      void (*jacf)(double *p, double *j, int m, int n, void *adata),
+      double *p, double *x, int m, int n, int itmax, double *opts,
+      double *info, double *work, double *covar, void *adata);
+
+/* linear system solvers */
+extern int dAx_eq_b_QR(double *A, double *B, double *x, int m);
+extern int dAx_eq_b_QRLS(double *A, double *B, double *x, int m, int n);
+extern int dAx_eq_b_Chol(double *A, double *B, double *x, int m);
+extern int dAx_eq_b_LU(double *A, double *B, double *x, int m);
+extern int dAx_eq_b_SVD(double *A, double *B, double *x, int m);
+extern int dAx_eq_b_BK(double *A, double *B, double *x, int m);
+
+#ifdef __cplusplus
+}
+#endif
+
+/* common suffix for LAPACK subroutines. Define empty in case of no prefix. */
+#define LM_LAPACK_SUFFIX _
+//#define LM_LAPACK_SUFFIX  // define empty
+
+/* common suffix for BLAS subroutines */
+#define LM_BLAS_SUFFIX _ // use this in case of no BLAS prefix
+
+#define LCAT_(a, b)    #a b
+#define LCAT(a, b)    LCAT_(a, b) // force substitution
+#define RCAT_(a, b)    a #b
+#define RCAT(a, b)    RCAT_(a, b) // force substitution
+
+#define LM_MK_LAPACK_NAME(s) LM_ADD_PREFIX(LM_CAT_(s, LM_LAPACK_SUFFIX))
+#define LM_MK_BLAS_NAME(s)   LM_ADD_PREFIX(LM_CAT_(s, LM_BLAS_SUFFIX))
+
+#define __BLOCKSZ__       32 /* block size for cache-friendly matrix-matrix multiply. It should be
+                              * such that __BLOCKSZ__^2*sizeof(LM_REAL) is smaller than the CPU (L1)
+                              * data cache size. Notice that a value of 32 when LM_REAL=double assumes
+                              * an 8Kb L1 data cache (32*32*8=8K). This is a concervative choice since
+                              * newer Pentium 4s have a L1 data cache of size 16K, capable of holding
+                              * up to 45x45 double blocks.
+                              */
+#define __BLOCKSZ__SQ    (__BLOCKSZ__)*(__BLOCKSZ__)
+
+/* add a prefix in front of a token */
+#define LM_CAT__(a, b) a ## b
+#define LM_CAT_(a, b) LM_CAT__(a, b) // force substitution
+#define LM_ADD_PREFIX(s) LM_CAT_(LM_PREFIX, s)
+
+#define FABS(x) (((x)>=0.0)? (x) : -(x))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* blocking-based matrix multiply */
+extern void dlevmar_trans_mat_mat_mult(double *a, double *b, int n, int m);
+
+/* e=x-y and ||e|| */
+extern double dlevmar_L2nrmxmy(double *e, double *x, double *y, int n);
+
+/* covariance of LS fit */
+extern int dlevmar_covar(double *JtJ, double *C, double sumsq, int m, int n);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LEVMAR_H_ */
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_mulliken.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_mulliken.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_mulliken.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_mulliken.c	2014-09-08 01:47:57.000000000 +0200
@@ -0,0 +1,45 @@
+#include<stdio.h>
+// #include"charge_transfer.h"
+#include"qm_dftb.h"
+
+void mulliken(int nn, double *qmat, double *qmulli, double *qtot, int ndim,
+                double *occ, double **a, double **overl, int *ind,
+                int lmax[DFTB_MAXTYPES], int *izp)
+{
+  int i, j, lj, m, n, jofn, mj;
+  double q, qhelp, temp;
+
+  for (m=0; m<ndim; m++) {
+    qmulli[m] = 0.0;
+    for (n=0; n<ndim; n++) {
+      temp = 0.0;
+      for (i=0; i<ndim; i++)
+        temp += occ[i] * a[m][i] * a[n][i];
+      qmulli[m] += overl[m][n] * temp;
+    }
+  }
+
+  // INDEXING OK HERE!
+  for (j=0; j<nn; j++) {
+    q = 0.0;
+    // printf("j = %d\n", j);
+    for (lj=0; lj<lmax[izp[j]]; lj++) {
+      jofn = ind[j] + lj*lj;
+      // printf("lj = %d, jofn = %d\n", lj, jofn);
+      qhelp = 0.0;
+      for (mj=0; mj<=2*lj; mj++) {
+        // printf("mj = %d\n", mj);
+        qhelp += qmulli[jofn+mj];
+      }
+      q += qhelp;
+    }
+    qmat[j] = q;
+  }
+
+  *qtot = 0.0;
+  for (j=0; j<nn; j++)
+    *qtot += qmat[j];
+  //printf("Mulliken - total charge = %f\n", *qtot);
+
+  return;
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_neighborlist.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_neighborlist.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_neighborlist.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_neighborlist.c	2014-10-14 21:12:44.000000000 +0200
@@ -0,0 +1,61 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include"qm_dftb.h"
+
+// adopted from src/gmxlib/pbc.c
+static inline void pbc_dx_dftb(matrix box, const dvec x1, const dvec x2, dvec dx)
+{
+    int i;
+    double length;
+
+    for(i=0; i<DIM; i++) {
+        dx[i] = x1[i] - x2[i];
+        length = (double) box[i][i] * NM_TO_BOHR;
+        while (dx[i] > length / 2.) {
+            dx[i] -= length;
+        }
+        while (dx[i] < - length / 2.) {
+            dx[i] += length;
+        }
+    }
+
+    return;
+}
+
+void do_neighborlist_for_dftb(dftb_t *dftb, matrix box)
+{
+  int j, k, nn, ne, counter, status;
+  dftb_phase1_t dftb1;
+  double charge_checksum, rlist2, dbondnorm;
+  t_pbc pbc;
+  dvec bond;
+  
+  dftb1  = dftb->phase1;
+  nn     = dftb1.nn;
+  ne     = dftb1.ne;
+  rlist2 = SQR(dftb->rlist_pme * NM_TO_BOHR);
+
+  // a naive version of neighborsearching - double loop over QM and *all* MM atoms
+  for (j=0; j<nn; j++) {
+    counter = 0;
+    // check every MM atom
+    for (k=0; k<ne; k++) {
+      pbc_dx_dftb(box, dftb1.x[j], dftb1.xe[k], bond);
+      if (dnorm2(bond) < rlist2) {
+        dftb1.neighbor_pme[j][counter] = k;
+	counter++;
+	//printf("QM atom %d: neighbor no. %d is MM atom no. %d\n", j+1, counter, k+1);
+	if (counter == MAX_PME_NEIGHBORS) {
+	  fprintf(stderr, "\nToo many PME neighbors found for QM atom %d\nExiting!\n\n", j+1);
+	  exit(-1);
+	}
+      }
+    }
+    dftb1.neighbors_pme[j] = counter;
+    //fprintf(stderr, "NS for PME/DFTB: QM atom %d has %d MM neighbors\n", j+1, counter);
+  }
+
+  return;
+}
+
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_output.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_output.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_output.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_output.c	2012-09-13 14:25:22.000000000 +0200
@@ -0,0 +1,66 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+// #include"charge_transfer.h"
+#include"qm_dftb.h"
+
+void outeigenvectors(double **a, double *ev, int *ind, int nn, dftb_phase1_t dftb1)
+{
+  FILE *f1, *f2;
+  int i, j, k, l, neig;
+  char orbital[4][4];
+
+  strcpy(orbital[0], "S  ");
+  strcpy(orbital[1], "Px ");
+  strcpy(orbital[2], "Py ");
+  strcpy(orbital[3], "Pz ");
+  neig = ind[nn];
+
+  f2 = fopen("ao2fo", "w");
+  for (i=0; i<neig; i++)
+    for (j=0; j<neig; j++)
+      fprintf(f2, "%16.12f\n", a[j][i]);
+  fclose(f2);
+
+  f1 = fopen("EVC.DAT", "w");
+  fprintf(f1, "THE ONE-ELECTRON EIGENVALUES AND EIGENVECTORS\n\n");
+  for (i=0; i<neig; i++) {
+    fprintf(f2, "%3dth eigenvalue = %10.6f H = %10.6f eV\n", i+1, ev[i], ev[i]*27.2116);
+    fprintf(f2, "Atom No.   Atom Type\n");
+    for (j=0; j<nn; j++) {
+      fprintf(f2, "%5d  %5d\n", j+1, dftb1.izp[j]);
+      for (l=ind[j], k=0; l<ind[j+1]; l++, k++)
+        fprintf(f2, "%s%8.5f\n", orbital[k], a[l][i]);
+    }
+    fprintf(f2, "\n");
+  }
+  fclose(f1);
+  
+  return;
+}
+
+void outspec(int nn, int ndim, int *ind, double *ev, double *occ,
+               double efermi, double *qmat, double *qmulli, dftb_t *dftb, dftb_phase1_t dftb1)
+{
+  FILE *f;
+  int i, ind1, ind2, j;
+
+  f = fopen("SPE.DAT", "w");
+  fprintf(f, "%20.12f%20.12f\n", efermi, efermi*27.2114);
+  for (i=0; i<ndim; i++)
+    fprintf(f, "%20.12f%20.12f%20.12f\n", ev[i], ev[i]*27.2114, occ[i]);
+  fclose(f);
+
+  f = fopen("CHR.DAT", "w");
+  for (i=0; i<nn; i++) {
+    ind1 = ind[i];
+    ind2 = ind1 + SQR(dftb->lmax[dftb1.izp[i]]);
+    fprintf(f, "%4d%12.6f", i+1, qmat[i]);
+    for (j=ind1; j<ind2; j++)
+      fprintf(f, "%12.6f", qmulli[j]);
+    fprintf(f, "\n");
+  }
+  fclose(f);
+
+  return;
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_repulsive.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_repulsive.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_repulsive.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_repulsive.c	2012-09-13 14:25:25.000000000 +0200
@@ -0,0 +1,65 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include"qm_dftb.h"
+
+double repulsive(dftb_t *dftb, dvec *x, dvec *grad)
+{
+  int i, j, k, izpj, izpk;
+  double r, erep, grdr, xh;
+  dvec bond, tmpgrad;
+
+  erep = 0.e0;
+
+  for (j=0; j<dftb->atoms; j++) {
+    izpj = dftb->phase1.izp[j];
+    //printf("repulsive j = %d\n", j);
+    for (k=j+1; k<dftb->atoms; k++) {
+      izpk = dftb->phase1.izp[k];
+      dvec_sub(x[k], x[j], bond);
+      r = dnorm(bond);
+      /* calculate the contribution of the pair j, k */
+      if (r < 1.e-2 || r > dftb->cutoff[izpj][izpk]) {
+        /* overlap of atoms OR behind cutoff - no contribution */
+        continue;
+      }
+      if (r < dftb->xr[izpj][izpk][0][0]) {
+        erep += exp(-dftb->efkt[izpj][izpk][0]*r + dftb->efkt[izpj][izpk][1]) + dftb->efkt[izpj][izpk][2];
+        grdr = -dftb->efkt[izpj][izpk][0] * exp(-dftb->efkt[izpj][izpk][0]*r + dftb->efkt[izpj][izpk][1]);
+      } else {
+        /* otherwise - cubic spline */
+        for (i=0; i<dftb->numint[izpj][izpk]; i++)
+          if (r >= dftb->xr[izpj][izpk][i][0] && r < dftb->xr[izpj][izpk][i][1])
+            break;
+        xh = r - dftb->xr[izpj][izpk][i][0];
+        if (i < dftb->numint[izpj][izpk] - 1) {
+          erep += dftb->coeff[izpj][izpk][i][0] +
+                  dftb->coeff[izpj][izpk][i][1] * xh +
+                  dftb->coeff[izpj][izpk][i][2] * xh * xh +
+                  dftb->coeff[izpj][izpk][i][3] * xh * xh * xh;
+          grdr = dftb->coeff[izpj][izpk][i][1] +
+                 2 * dftb->coeff[izpj][izpk][i][2] * xh +
+                 3 * dftb->coeff[izpj][izpk][i][3] * xh * xh;
+        } else { /* 5th order spline is the last */
+          erep += dftb->coeff[izpj][izpk][i][0] +
+                  dftb->coeff[izpj][izpk][i][1] * xh +
+                  dftb->coeff[izpj][izpk][i][2] * xh * xh +
+                  dftb->coeff[izpj][izpk][i][3] * xh * xh * xh +
+                  dftb->coeff[izpj][izpk][i][4] * xh * xh * xh * xh +
+                  dftb->coeff[izpj][izpk][i][5] * xh * xh * xh * xh * xh;
+          grdr = dftb->coeff[izpj][izpk][i][1] +
+                 2 * dftb->coeff[izpj][izpk][i][2] * xh +
+                 3 * dftb->coeff[izpj][izpk][i][3] * xh * xh +
+                 4 * dftb->coeff[izpj][izpk][i][4] * xh * xh * xh +
+                 5 * dftb->coeff[izpj][izpk][i][5] * xh * xh * xh * xh;
+        }
+      }
+      dsvmul(grdr / r, bond, tmpgrad);
+      dvec_inc(grad[k], tmpgrad);
+      /* the other atom - j */
+      dsvmul(-grdr / r, bond, tmpgrad);
+      dvec_inc(grad[j], tmpgrad);
+    }
+  }
+  return erep;
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_shift.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_shift.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_shift.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_shift.c	2014-04-28 00:09:46.000000000 +0200
@@ -0,0 +1,59 @@
+#include "qm_dftb.h"
+
+/*
+c=======================================================================
+c      get qdiff and shift-vectors 
+c      (necessary for Hubbard contribution to H matrix elements, total 
+c       energy and forces)
+c
+c      INPUT:
+c      integer     nn                     number of atoms (in one cell)
+c      real*8      qmat(NNDIM)            mulliken charges 
+c      real*8      qzero(MAXTYP)          neutral atomic charges
+c      integer     izp(NNDIM)             map from atoms to atom types
+c      real*8      gammamat(NNDIM,NNDIM)  gamma_{ij} for every atom 
+c                                         pairing i and j (includes 
+c                                         Ewald+shortrange if  
+c                                         periodicity is switched on)
+c      real*8      gammader(NNDIM,NNDIM)  Gamma_{ij)=dU_{ij}/dq_i for 
+c                                         every atom pairing
+c      character*1 sccmode                last term of DFT taylor series
+c                                         which is included 
+c                                         (e.g. 2=2nd order, 3=3rdorder)
+c
+c      OUTPUT:
+c      real*8  qdiff(NNDIM)   net charge of atoms
+c      real*8  shift(NNDIM)   shift(i) = \sum_{j}\Delta q_j \gamma_{ij}
+c      real*8  shift3(NNDIM)  shift3(i) = \sum_{j}\Delta q_i \Delta q_j
+c                                          \Gamma_{ij}
+c      real*8  shift3A(NNDIM) shift3A(i)= \sum_{j}\Delta q_j \Delta q_j
+c                                          \Gamma_{ji}
+c======================================================================
+*/
+
+void hamilshift(int nn, double *qmat, double *qzero, int *izp, double *qdiff, double **gammamat, double **gammader,
+		int sccmode, double *shift, double *shift3, double *shift3a)
+{
+	int i, j;
+
+	for (i=0; i<nn; i++) {
+		qdiff[i]   = qmat[i] - qzero[izp[i]];
+		shift[i]   = 0.;
+		shift3[i]  = 0.;
+		shift3a[i] = 0.;
+	}
+
+	for (i=0; i<nn; i++)
+		for (j=0; j<nn; j++)
+			shift[i] += qdiff[j] * gammamat[i][j];
+
+	if (sccmode == 3)
+		for (i=0; i<nn; i++) {
+			for (j=0; j<nn; j++) {
+				shift3[i]  += qdiff[j] * gammader[i][j];
+				shift3a[i] += qdiff[j] * qdiff[j] * gammader[j][i];
+			}
+			shift3[i] *= qdiff[i];
+		}
+	return;
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_skpar.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_skpar.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_skpar.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_skpar.c	2012-09-13 14:25:26.000000000 +0200
@@ -0,0 +1,230 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+// #include"charge_transfer.h"
+#include"qm_dftb.h"
+
+int skspar(int i, int j, double r2, double dd[13],
+           int lmax[DFTB_MAXTYPES], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3],
+           int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES])
+{
+  int maxmax, minmax, in, ind, inu, mxind;
+  double r, grdr, x0, x1, x2, f0, f1, f2, xh, hl;
+
+  if (lmax[i] < lmax[j]) {
+    maxmax = lmax[j];
+    minmax = lmax[i];
+  } else {
+    maxmax = lmax[i];
+    minmax = lmax[j];
+  }
+
+  if (maxmax == 1)
+    inu = 9;
+  if (maxmax == 2)
+    switch (minmax) {
+      case 1: inu = 8;
+	      break;
+      case 2: inu = 5;
+	      break;
+    }
+  if (maxmax == 3)
+    switch (minmax) {
+      case 1: inu = 7;
+	      break;
+      case 2: inu = 3;
+	      break;
+      case 3: inu = 0;
+	      break;
+    }
+
+  // mxind = Maximaler Index bis zu dem der Spline weitergefuehrt wird
+  mxind = dim[i][j] + 0.3/dr[i][j] - 2;
+  //mxind = dim[i][j] + (0.3/dr[i][j]);
+  r = sqrt(r2);
+  //ind = r/dr[i][j] + 1; // REALLY PLUS ONE???
+  ind = r/dr[i][j];
+
+  if (r2 < 1e-8) {
+    for (in=0; in<3; in++)
+      dd[in+10] = 1.0;
+  } else {
+    if (ind > dim[i][j]-3) {
+      // FREE CUBIC SPLINE
+      if (ind == dim[i][j]-2) {
+        x0 = (dim[i][j] - 3) * dr[i][j];
+        x1 = x0 + dr[i][j];
+        x2 = x1 + dr[i][j];
+        xh = r - x1;
+        hl = x2 - x1;
+        for (in=inu; in<10; in++) {
+          f0 = skstab[i][j][dim[i][j]-3][in];
+          f1 = skstab[i][j][dim[i][j]-2][in];
+          f2 = skstab[i][j][dim[i][j]-1][in];
+          dd[in] = cubicspline(f0, f1, f2, x0, x1, xh, hl, dr[i][j]);
+        }
+      } else {
+        if (ind < mxind-1) {
+	// 5TH DEGREE SPLINE
+          x0 = (dim[i][j] - 3) * dr[i][j];
+          x1 = x0 + dr[i][j];
+          x2 = x1 + dr[i][j];
+          //xh = r - (mxind-1) * dr[i][j];
+          xh = r - mxind * dr[i][j];
+	  for (in=inu; in<10; in++) {
+            f0 = skstab[i][j][dim[i][j]-3][in];
+            f1 = skstab[i][j][dim[i][j]-2][in];
+            f2 = skstab[i][j][dim[i][j]-1][in];
+            dd[in] = spline5th(f0, f1, f2, x0, x1, x2, xh, dr[i][j], mxind);
+	  }
+	} else {
+        // ZERO
+	  for (in=inu; in<10; in++)
+	    dd[in] = 0.0;
+        }
+      }
+    } else {
+      grdr = (r - ind*dr[i][j]) / dr[i][j];
+      for (in=inu; in<10; in++) {
+        f0 = skstab[i][j][ind][in];
+        f1 = skstab[i][j][ind+1][in];
+        f2 = skstab[i][j][ind+2][in];
+        dd[in] = f0 + (f1-f0)*grdr + (f2+f0-2.0*f1)*grdr*(grdr-1.0) / 2.0;
+      }
+    }
+  }
+  return 0;
+}
+
+int skhpar(int i, int j, double r2, double dd[13],
+           int lmax[DFTB_MAXTYPES], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3],
+           int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES])
+{
+  int maxmax, minmax, in, ind, inu, mxind;
+  double r, grdr, x0, x1, x2, f0, f1, f2, xh, hl;
+
+  if (lmax[i] < lmax[j]) {
+    maxmax = lmax[j];
+    minmax = lmax[i];
+  } else {
+    maxmax = lmax[i];
+    minmax = lmax[j];
+  }
+
+  if (maxmax == 1)
+    inu = 9;
+  if (maxmax == 2)
+    switch (minmax) {
+      case 1: inu = 8;
+	      break;
+      case 2: inu = 5;
+	      break;
+    }
+  if (maxmax == 3)
+    switch (minmax) {
+      case 1: inu = 7;
+	      break;
+      case 2: inu = 3;
+	      break;
+      case 3: inu = 0;
+	      break;
+    }
+
+  // mxind = Maximaler Index bis zu dem der Spline weitergefuehrt wird
+  mxind = dim[i][j] + 0.3/dr[i][j] - 2;
+  //mxind = dim[i][j] + (0.3/dr[i][j]);
+  r = sqrt(r2);
+  //ind = r/dr[i][j] + 1; // REALLY PLUS ONE???
+  ind = r/dr[i][j];
+
+  if (r2 < 1e-8) {
+    for (in=0; in<3; in++)
+      dd[in+10] = skself[i][in];
+  } else {
+    if (ind > dim[i][j]-3) {
+      // FREE CUBIC SPLINE
+      if (ind == dim[i][j]-2) {
+        x0 = (dim[i][j] - 3) * dr[i][j];
+        x1 = x0 + dr[i][j];
+        x2 = x1 + dr[i][j];
+        xh = r - x1;
+        hl = x2 - x1;
+        for (in=inu; in<10; in++) {
+          f0 = skhtab[i][j][dim[i][j]-3][in];
+          f1 = skhtab[i][j][dim[i][j]-2][in];
+          f2 = skhtab[i][j][dim[i][j]-1][in];
+          dd[in] = cubicspline(f0, f1, f2, x0, x1, xh, hl, dr[i][j]);
+        }
+      } else {
+        if (ind < mxind-1) {
+	// 5TH DEGREE SPLINE
+          x0 = (dim[i][j] - 3) * dr[i][j];
+          x1 = x0 + dr[i][j];
+          x2 = x1 + dr[i][j];
+          //xh = r - (mxind-1) * dr[i][j];
+          xh = r - mxind * dr[i][j];
+	  for (in=inu; in<10; in++) {
+            f0 = skhtab[i][j][dim[i][j]-3][in];
+            f1 = skhtab[i][j][dim[i][j]-2][in];
+            f2 = skhtab[i][j][dim[i][j]-1][in];
+            dd[in] = spline5th(f0, f1, f2, x0, x1, x2, xh, dr[i][j], mxind);
+	  }
+	} else {
+        // ZERO
+	  for (in=inu; in<10; in++)
+	    dd[in] = 0.0;
+        }
+      }
+    } else {
+      grdr = (r - ind*dr[i][j]) / dr[i][j];
+      for (in=inu; in<10; in++) {
+        f0 = skhtab[i][j][ind][in];
+        f1 = skhtab[i][j][ind+1][in];
+        f2 = skhtab[i][j][ind+2][in];
+        dd[in] = f0 + (f1-f0)*grdr + (f2+f0-2.0*f1)*grdr*(grdr-1.0) / 2.0;
+      }
+    }
+  }
+  return 0;
+}
+
+double cubicspline(double f0, double f1, double f2, double x0, double x1,
+         double xh, double hl, double dr)
+{
+  double f1abl, f2abl, a, b, c, d;
+
+  f2abl= (f2 + f0 - 2.0*f1) / dr*dr;
+  f1abl= (f1 - f0)/dr + 0.5*f2abl*(x1-x0);
+  a = f1;
+  b = f1abl;
+  c = f2abl/2.0;
+  d = (f2-a)/(hl*hl*hl) - b/(hl*hl) - c/hl;
+
+  return a + b*xh + c*xh*xh + d*xh*xh*xh;
+}
+
+double spline5th(double f0, double f1, double f2, double x0, double x1, double x2,
+         double xh, double dr, int mxind)
+{
+  double hl, f1abl, f2abl, a, b, c, d, hsp, isp, jsp;
+
+  f2abl = (f2+f0-2.0*f1) / dr*dr;
+  f1abl = (f1-f0)/dr + 0.5*f2abl*(x1-x0);
+  a = f1;
+  b = f1abl;
+  c = f2abl/2.0;
+  hl = x2-x1;
+  d = (f2-a)/(hl*hl*hl) - b/(hl*hl) - c/hl;
+
+  f1abl = b + 2.0*c*hl + 3.0*d*hl*hl;
+  f2abl = 2.0*c + 6.0*d*hl;
+
+  hl = x2 - mxind*dr;
+  hsp = 10.0*f2/(hl*hl*hl) - 4.0*f1abl/(hl*hl) + f2abl/(2.0*hl);
+  isp = -15.0*f2/(hl*hl*hl*hl) + 7.0*f1abl/(hl*hl*hl) - f2abl/(hl*hl);
+  jsp = 6.0*f2/(hl*hl*hl*hl*hl) - 3.0*f1abl/(hl*hl*hl*hl) + f2abl/(2.0*hl*hl*hl);
+
+  hl=xh*xh*xh;
+  return (hsp + isp*xh + jsp*xh*xh) * hl;
+}
+
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_slkode.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_slkode.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_slkode.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_slkode.c	2012-09-13 14:25:25.000000000 +0200
@@ -0,0 +1,127 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+// #include"charge_transfer.h"
+#include"qm_dftb.h"
+
+//void slkmatrices(int i, int j, double (*xat)[3],
+void slkmatrices(int i, int j, dvec *xat,
+         double ham[LDIM][LDIM], double over[LDIM][LDIM],
+         int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+         int *izp, tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3])
+{
+  double dif0[3];
+  int k, l1, l2;
+
+  for (k=0; k<3; k++)
+    dif0[k] = xat[j][k] - xat[i][k];
+
+  for (l2=0; l2<LDIM; l2++)
+    for (l1=0; l1<LDIM; l1++) {
+      ham[l1][l2] = 0.0;
+      over[l1][l2] = 0.0;
+    }
+
+  slkode(dif0, izp[i], izp[j], ham, lmax, dim, dr, &skhpar, skstab, skhtab, skself);
+  slkode(dif0, izp[i], izp[j], over, lmax, dim, dr, &skspar, skstab, skhtab, skself);
+  return;
+}
+
+void slkode(double dum[3], int i, int j, double em[LDIM][LDIM], int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+               int (*iovpar)(int, int, double, double [13], int [DFTB_MAXTYPES],
+                 tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+                 int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+               tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3])
+{
+  double x[6], x2[6], dummy[LDIM][LDIM], r2, r2i, ri;
+  int k, l, minmax, maxmax;
+
+  r2 = 0.0;
+  for (l=0; l<3; l++) {
+    x[l] = dum[l];
+    x2[l] = x[l]*x[l];
+    r2 += x2[l];
+  }
+
+  if (r2 > 1.0e-8) {
+
+    r2i = 1.0/r2;
+    ri = sqrt(r2i);
+    for (l=0; l<3; l++) {
+      x[l] *= ri;
+      x[l+3] = x[l];
+      x2[l] *= r2i;
+      x2[l+3] = x2[l];
+    }
+
+    if (lmax[i] < lmax[j]) {
+      maxmax = lmax[j];
+      minmax = lmax[i];
+    } else {
+      maxmax = lmax[i];
+      minmax = lmax[j];
+    }
+
+    skss(x, x2, i, j, r2, lmax, dim, dr, iovpar, em, skstab, skhtab, skself);
+
+    if (maxmax == 1) return;
+
+    if (minmax >= 2) {
+      skpp(x, x2, i, j, r2, lmax, dim, dr, iovpar, em, skstab, skhtab, skself);
+      sksp(x, x2, i, j, r2, lmax, dim, dr, iovpar, em, em, skstab, skhtab, skself);
+      if (i != j)
+        sksp(x, x2, j, i, r2, lmax, dim, dr, iovpar, dummy, em, skstab, skhtab, skself);
+    } else {
+      if (lmax[j] >= 2)
+        sksp(x, x2, i, j, r2, lmax, dim, dr, iovpar, em, em, skstab, skhtab, skself);
+      else
+        sksp(x, x2, j, i, r2, lmax, dim, dr, iovpar, dummy, em, skstab, skhtab, skself);
+    }
+
+    if (maxmax == 2) return;
+
+    if (minmax == 3) {
+      skdd(x, x2, i, j, r2, lmax, dim, dr, iovpar, em, skstab, skhtab, skself);
+      sksd(x, x2, i, j, r2, lmax, dim, dr, iovpar, em, em, skstab, skhtab, skself);
+      skpd(x, x2, i, j, r2, lmax, dim, dr, iovpar, em, em, skstab, skhtab, skself);
+      if (i != j) {
+	sksd(x, x2, j, i, r2, lmax, dim, dr, iovpar, dummy, em, skstab, skhtab, skself);
+	skpd(x, x2, j, i, r2, lmax, dim, dr, iovpar, dummy, em, skstab, skhtab, skself);
+      }
+    } else {
+      if (lmax[i] == 1) {
+        sksd(x, x2, i, j, r2, lmax, dim, dr, iovpar, em, em, skstab, skhtab, skself);
+      }
+      if (lmax[i] == 2) {
+        sksd(x, x2, i, j, r2, lmax, dim, dr, iovpar, em, em, skstab, skhtab, skself);
+        skpd(x, x2, i, j, r2, lmax, dim, dr, iovpar, em, em, skstab, skhtab, skself);
+      }
+      if (lmax[j] == 1) {
+        sksd(x, x2, j, i, r2, lmax, dim, dr, iovpar, dummy, em, skstab, skhtab, skself);
+      }
+      if (lmax[j] == 2) {
+        sksd(x, x2, j, i, r2, lmax, dim, dr, iovpar, dummy, em, skstab, skhtab, skself);
+        skpd(x, x2, j, i, r2, lmax, dim, dr, iovpar, dummy, em, skstab, skhtab, skself);
+      }
+    }
+  } else {
+    //if (i != j) return;
+
+    for (k=0; k<LDIM; k++)
+      for (l=0; l<LDIM; l++)
+        em[k][l] = 0.0;
+
+    selfs(i, j, r2, lmax, dim, dr, iovpar, em, skstab, skhtab, skself);
+
+    if (lmax[i] == 1) return;
+
+    selfp(i, j, r2, lmax, dim, dr, iovpar, em, skstab, skhtab, skself);
+
+    if (lmax[i] == 2) return;
+
+    selfd(i, j, r2, lmax, dim, dr, iovpar, em, skstab, skhtab, skself);
+  }
+
+  return;
+}
+
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_slko_levmar.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_slko_levmar.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_slko_levmar.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_slko_levmar.c	2015-02-10 16:59:37.458750138 +0100
@@ -0,0 +1,213 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include"qm_dftb.h"
+
+#define ITMAX (1000)
+#define LM_INFO_SZ (10)
+
+int dlevmar_der(void (*func)(double *p, double *hx, int m, int n, void *adata),
+                void (*jacf)(double *p, double *j, int m, int n, void *adata),
+                double *p, double *x, int m, int n, int itmax,
+                double opts[4], double info[LM_INFO_SZ], double *work, double *covar, void *adata);
+
+double slko_value(double *par, double x)
+{
+  int i;
+  double a, b, ret;
+
+  a = par[0];
+  b = 1.;
+  ret = 0.;
+  for (i=0; i<10; i++) {
+    b *= par[1];
+    ret += par[2 + i] * exp(-a * b * x);
+  }
+
+  return ret;
+}
+
+double slko_jacobi_a(double *par, double x)
+{
+  int i;
+  double a, b, ret;
+
+  a = par[0];
+  b = 1.;
+  ret = 0.;
+  for (i=0; i<10; i++) {
+    b *= par[1];
+    ret += par[2 + i] * exp(-a * b * x) * (-b * x);
+  }
+
+  return ret;
+}
+
+double slko_jacobi_b(double *par, double x)
+{
+  int i;
+  double a, b, ret;
+
+  a = par[0];
+  b = 1.;
+  ret = 0.;
+  for (i=0; i<10; i++) {
+    b *= par[1];
+    ret += par[2 + i] * exp(-a * b * x) * (-(i+1) * a * b * x / par[1]);
+  }
+
+  return ret;
+}
+
+double slko_jacobi_c(double *par, int i, double x)
+{
+  double a, b;
+  double ii;
+
+  a = par[0];
+  b = 1.;
+  for (ii=0; ii<=i; ii++)
+    b *= par[1];
+
+  return exp(-a * b * x);
+}
+
+void slko_get_values(double *par, double *hx, int m, int kk, void *adata)
+{
+  /* kk is dftb->dim1[i][j]
+     m is 12
+   */
+  int k;
+
+  for (k=0; k<kk; k++)
+    hx[k] = slko_value(par, (k + 50) * 0.02);
+  return;
+}
+
+void slko_get_jacobian(double *par, double *j, int m, int kk, void *adata)
+{
+  /* kk is dftb->dim1[i][j]
+     m is 12
+   */
+  int ix, i, k;
+
+  /* first alternative - indexing major over data points, minor over the 12 parameters
+  ix=0;
+  for (k=0; k<kk; k++) {
+    j[ix++] = slko_jacobi_a(par, k * 0.02);
+    j[ix++] = slko_jacobi_b(par, k * 0.02);
+    for (i=0; i<10; i++)
+      j[ix++] = slko_jacobi_c(par, i, k * 0.02);
+  }
+  */
+
+  /* second alternative - indexing major over the 12 parameters, minor over the data points */
+  ix=0;
+  for (k=0; k<kk; k++)
+    j[ix++] = slko_jacobi_a(par, (k + 50) * 0.02);
+  for (k=0; k<kk; k++)
+    j[ix++] = slko_jacobi_b(par, (k + 50) * 0.02);
+  for (i=0; i<10; i++)
+    for (k=0; k<kk; k++)
+      j[ix++] = slko_jacobi_c(par, i, (k + 50) * 0.02);
+
+  return;
+}
+
+void qm_dftb_slko_levmar(dftb_t *dftb, int i, int j)
+{
+  const double crit_for_zero=1.e-10;
+  double par[12]; /* the parameters */
+  double *skshtab_trans; /* temp array for the currently processed data series */
+  int l, all_zero, k, kk, mm, ret;
+  
+  /* work with the element pair (i,j), process every of 10 Hamiltonians and 10 overlaps */
+
+  /* Hamiltonian */
+  for (l=0; l<10; l++) {
+    /* check if the parameters are zero... */
+    all_zero = 1;
+    for (k=2; k<dftb->dim1[i][j]; k++) {
+      if (dftb->skhtab1[i][j][k][l] > crit_for_zero || dftb->skhtab1[i][j][k][l] < - crit_for_zero) {
+        // printf("param at k = %d non-zero: %14.10f\n", k+1, dftb->skhtab1[i][j][k][l]);
+        all_zero = 0;
+	break;
+      }
+    }
+    if (all_zero) {
+      printf("Hamiltonian for element pair %d-%d, column %d: zero, not processing\n", i+1, j+1, l+1);
+      continue;
+    }
+
+    /* so it is not zero */
+
+    /* transpose the array so that Lev-Mar can work on that */
+    snew(skshtab_trans, dftb->dim1[i][j]-50); 
+    for (kk=0; k<dftb->dim1[i][j]; k++)
+      skshtab_trans[kk] = dftb->skhtab1[i][j][kk+50][l];
+
+    /* initial values for the parameters */
+    par[0] = 0.5;
+    par[1] = 0.9;
+    for (mm=2; mm<12; mm++)
+      par[mm] = dftb->skhtab1[i][j][50][l] > 0 ? 0.1 : -0.1;
+    /*
+    for (mm=0; mm<12; mm++)
+      par[mm] = 1.;
+    */
+
+    ret = dlevmar_der(slko_get_values, slko_get_jacobian, par, skshtab_trans, 12, dftb->dim1[i][j] - 50,
+      ITMAX, NULL, NULL, NULL, NULL, NULL);
+
+    printf("Hamil params for elem pair %d-%d, column %d:\n", i+1, j+1, l+1);
+    printf("%d iters, %f %f %f %f %f %f %f %f %f %f %f %f\n", ret,
+      par[0], par[1], par[2], par[3], par[4], par[5], par[6], par[7], par[8], par[9], par[10], par[11]);
+    for (k=50; k<dftb->dim1[i][j]; k++)
+      printf("%5.2f %12.8f %12.8f\n", k*0.02, dftb->skhtab1[i][j][k][l], slko_value(par, k*0.02));
+
+    sfree(skshtab_trans); 
+  } /* l */
+
+  /* overlap */
+  for (l=0; l<10; l++) {
+    /* check if the parameters are zero... */
+    all_zero = 1;
+    for (k=2; k<dftb->dim1[i][j]; k++) {
+      if (dftb->skstab1[i][j][k][l] > crit_for_zero || dftb->skstab1[i][j][k][l] < - crit_for_zero) {
+        // printf("param at k = %d non-zero: %14.10f\n", k+1, dftb->skstab1[i][j][k][l]);
+        all_zero = 0;
+	break;
+      }
+    }
+    if (all_zero) {
+      printf("overlap for element pair %d-%d, column %d: zero, not processing\n", i+1, j+1, l+1);
+      continue;
+    }
+
+    /* so it is not zero */
+
+    /* transpose the array so that Lev-Mar can work on that */
+    snew(skshtab_trans, dftb->dim1[i][j] - 50); 
+    for (kk=0; k<dftb->dim1[i][j]; k++)
+      skshtab_trans[kk] = dftb->skstab1[i][j][kk+50][l];
+
+    /* initial values for the parameters */
+    par[0] = 0.5;
+    par[1] = 0.9;
+    for (mm=2; mm<12; mm++)
+      par[mm] = dftb->skstab1[i][j][50][l] > 0 ? 0.1 : -0.1;
+
+    ret = dlevmar_der(slko_get_values, slko_get_jacobian, par, skshtab_trans, 12, dftb->dim1[i][j] - 50,
+      ITMAX, NULL, NULL, NULL, NULL, NULL);
+
+    printf("overl params for elem pair %d-%d, column %d:\n", i+1, j+1, l+1);
+    printf("%d iters, %f %f %f %f %f %f %f %f %f %f %f %f\n", ret,
+      par[0], par[1], par[2], par[3], par[4], par[5], par[6], par[7], par[8], par[9], par[10], par[11]);
+    for (k=50; k<dftb->dim1[i][j]; k++)
+      printf("%5.2f %12.8f %12.8f\n", k*0.02, dftb->skstab1[i][j][k][l], slko_value(par, k*0.02));
+
+    sfree(skshtab_trans); 
+  } /* l */
+
+  return;
+}
diff -rupN gromacs-5.0/src/gromacs/mdlib/qm_dftb_slktrafo.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_slktrafo.c
--- gromacs-5.0/src/gromacs/mdlib/qm_dftb_slktrafo.c	1970-01-01 01:00:00.000000000 +0100
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qm_dftb_slktrafo.c	2012-09-13 14:25:26.000000000 +0200
@@ -0,0 +1,260 @@
+#include<stdio.h>
+#include<stdlib.h>
+#define Sqrt3 (1.732050808)
+// #include"charge_transfer.h"
+#include"qm_dftb.h"
+
+void skss(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double [13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+         int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3])
+{
+  int id;
+  double parm[13];
+
+  id = iovpar(i, j, r2, parm, lmax, skstab, skhtab, skself, dim, dr);
+  em[0][0] = parm[9];
+  return;
+}
+
+void sksp(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double [13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+         int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], double emt[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3])
+{
+  int l, id;
+  double parm[13];
+
+  id = iovpar(i, j, r2, parm, lmax, skstab, skhtab, skself, dim, dr);
+  for (l=0; l<3; l++) {
+    em[0][1+l] = x[l] * parm[8];
+    emt[1+l][0] = -em[0][1+l];
+    // em[1+l][0] = x[l] * parm[8];
+    // emt[0][1+l] = -em[1+l][0];
+  }
+  return;
+}
+
+void sksd(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double [13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+         int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], double emt[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3])
+{
+  int l, id;
+  double parm[13], es[5], d4, d5;
+
+  d4 = x2[ZZ] - 0.5 * (x2[XX] + x2[YY]);
+  d5 = x2[XX] - x2[YY];
+  id = iovpar(i, j, r2, parm, lmax, skstab, skhtab, skself, dim, dr);
+  for (l=0; l<3; l++)
+    es[l] = Sqrt3 * x[l] * x[l+1];
+  es[3] = 0.5 * Sqrt3 * d5;
+  es[4] = d4;
+  for (l=0; l<5; l++)
+    emt[4+l][0] =
+      em[0][4+l] = es[l] * parm[7];
+  return;
+}
+
+void skpp(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double [13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3], 
+         int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3])
+{
+  int k, l, ii, ir, is, id;
+  double parm[13], epp[6], hp, dm[6];
+
+  id = iovpar(i, j, r2, parm, lmax, skstab, skhtab, skself, dim, dr);
+  for (l=0; l<3; l++) {
+    epp[l] = x2[l];
+    epp[l+3] = x[l] * x[l+1];
+  }
+  for (l=0; l<3; l++) {
+    hp = epp[l];
+    dm[l] = hp * parm[5] + (1.0-hp) * parm[6];
+  }
+  for (l=3; l<6; l++) {
+    dm[l] = epp[l] * (parm[5] - parm[6]);
+  }
+  for (ir=0; ir<3; ir++)
+    for (is=0; is<=ir; is++) {
+      ii = ir - is;
+      k = 3*ii - (ii*(ii-1))/2 + is;
+      em[1+is][1+ir] = em[1+ir][1+is] = dm[k];
+    }
+  return;
+}
+
+void skpd(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double [13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+         int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], double emt[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3])
+{
+  int k, l, m, id, ir, is;
+  double parm[13], epd[13][2], dm[15], d3, d4, d5, d6;
+
+  d3 = x2[XX] + x2[YY];
+  d4 = x2[ZZ] - 0.5 * d3;
+  d5 = x2[XX] - x2[YY];
+  d6 = x[XX] * x[YY] * x[ZZ];
+  id = iovpar(i, j, r2, parm, lmax, skstab, skhtab, skself, dim, dr);
+
+  for (l=0; l<3; l++) {
+	  epd[l][0] = Sqrt3 * x2[l] * x[l+1];
+	  epd[l][1] = x[l+1] * (1. - 2. * x2[l]);
+	  epd[l+4][0] = Sqrt3 * x2[l] * x[l+2];
+	  epd[l+4][1] = x[l+2] * (1. - 2. * x2[l]);
+	  epd[l+7][0] = 0.5 * Sqrt3 * x[l] * d5;
+	  epd[l+10][0] = x[l] * d4;
+  }
+  epd[3][0] = Sqrt3 * d6;
+  epd[3][1] = -2. * d6;
+  epd[7][1] = x[XX] * (1. - d5);
+  epd[8][1] = -x[YY] * (1. + d5);
+  epd[9][1] = -x[ZZ] * d5;
+  epd[10][1] = - Sqrt3 * x[XX] * x2[ZZ];
+  epd[11][1] = - Sqrt3 * x[YY] * x2[ZZ];
+  epd[12][1] = Sqrt3 * x[ZZ] * d3;
+  for (l=0; l<15; l++)
+	  dm[l] = 0.;
+  for (m=0; m<2; m++) {
+	  dm[0] += epd[0][m] * parm[m+3];
+	  dm[1] += epd[5][m] * parm[m+3];
+	  dm[2] += epd[3][m] * parm[m+3];
+	  dm[4] += epd[1][m] * parm[m+3];
+	  dm[5] += epd[6][m] * parm[m+3];
+	  dm[6] += epd[4][m] * parm[m+3];
+	  dm[8] += epd[2][m] * parm[m+3];
+	  for (l=7; l<13; l++)
+		  dm[l+2] += epd[l][m] * parm[m+3];
+  }
+  dm[3] = dm[2];
+  dm[7] = dm[2];
+  for (ir=0; ir<5; ir++)
+	  for (is=0; is<3; is++) {
+		  k = 3 * ir + is;
+		  emt[4+ir][1+is] = -dm[k];
+		  em[1+is][4+ir] = dm[k];
+	  }
+  return;
+}
+
+void skdd(double x[6], double x2[6], int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double [13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+         int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3])
+{
+  int k, l, m, id, ii, ir, is;
+  double parm[13], e[15][3], dm[15], dd[3], d3, d4, d5, d6;
+
+  d3 = x2[XX] + x2[YY];
+  d4 = x2[ZZ] - 0.5 * d3;
+  d5 = x2[XX] - x2[YY];
+  id = iovpar(i, j, r2, parm, lmax, skstab, skhtab, skself, dim, dr);
+
+  for (l=0; l<3; l++) {
+	  e[l][0] = x2[l] * x2[l+1];
+	  e[l][1] = x2[l] + x2[l+1] - 4. * e[l][0];
+	  e[l][2] = x2[l+2] + e[l][0];
+	  e[l][0] *= 3.;
+  }
+  e[3][0] = SQR(d5);
+  e[3][1] = d3 - e[3][0];
+  e[3][2] = x2[ZZ] + 0.25 * e[3][0];
+  e[3][0] *= 0.75;
+  e[4][0] = SQR(d4);
+  e[4][1] = 3. * x2[ZZ] * d3;
+  e[4][2] = 0.75 * SQR(d3);
+  dd[0] = x[XX] * x[ZZ];
+  dd[1] = x[YY] * x[XX];
+  dd[2] = x[ZZ] * x[YY];
+  for (l=0; l<2; l++) {
+	  e[l+5][0] = 3. * x2[l+1] * dd[l];
+	  e[l+5][1] = dd[l] * (1. - 4. * x2[l+1]);
+	  e[l+5][2] = dd[l] * (x2[l+1] - 1.);
+  }
+  e[7][0] = dd[0] * d5 * 1.5;
+  e[7][1] = dd[0] * (1. - 2. * d5);
+  e[7][2] = dd[0] * (0.5 * d5 - 1.);
+  e[8][0] = 0.5 * Sqrt3 * d5 * d4;
+  e[8][1] = -Sqrt3 * d5 * x2[ZZ];
+  e[8][2] = 0.25 * Sqrt3 * d5 * (1. + x2[ZZ]);
+  e[9][0] = 3. * x2[XX] * dd[2];
+  e[9][1] = 4. * (0.25 - x2[XX]) * dd[2];
+  e[9][2] = (x2[XX] - 1.) * dd[2];
+  e[10][0] = 1.5 * dd[2] * d5;
+  e[10][1] = -dd[2] * (1. + 2. * d5);
+  e[10][2] = dd[2] * (1. + 0.5 * d5);
+  e[12][2] = 0.5 * d5 * dd[1];
+  e[12][1] = -2. * dd[1] * d5;
+  e[12][0] = 3. * e[12][2];
+  e[11][0] = Sqrt3 * d4 * dd[0];
+  e[13][0] = Sqrt3 * d4 * dd[2];
+  e[14][0] = Sqrt3 * d4 * dd[1];
+  e[14][1] = -2. * Sqrt3 * dd[1] * x2[ZZ];
+  e[14][2] = 0.5 * Sqrt3 * (1. + x2[ZZ]) * dd[1];
+  e[13][1] = Sqrt3 * dd[2] * (d3 - x2[ZZ]);
+  e[13][2] = -0.5 * Sqrt3 * dd[2] * d3;
+  e[11][1] = Sqrt3 * dd[0] * (d3 - x2[ZZ]);
+  e[11][2] = -0.5 * Sqrt3 * dd[0] * d3;
+  for (l=0; l<15; l++) {
+	  dm[l] = 0.;
+	  for (m=0; m<3; m++)
+		  dm[l] += e[l][m] * parm[m];
+  }
+  for (ir=0; ir<5; ir++)
+	  for (is=0; is<=ir; is++) {
+		  ii = ir-is;
+		  k = 5*ii - ii*(ii-1)/2 + is;
+		  em[4+ir][4+is] = em[4+is][4+ir] = dm[k];
+	  }
+  return;
+}
+
+void selfs(int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double [13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+         int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3])
+{
+  int id;
+  double parm[13];
+
+  id = iovpar(i, j, r2, parm, lmax, skstab, skhtab, skself, dim, dr);
+  em[0][0] = parm[12];
+  return;
+}
+
+void selfp(int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double [13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+         int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3])
+{
+  int l, m, id;
+  double parm[13];
+
+  id = iovpar(i, j, r2, parm, lmax, skstab, skhtab, skself, dim, dr);
+  for (l=0; l<3; l++) {
+    for (m=0; m<3; m++)
+      em[1+m][1+l] = 0.0;
+    em[1+l][1+l] = parm[11];
+  }
+  return;
+}
+
+void selfd(int i, int j, double r2, int lmax[DFTB_MAXTYPES], int dim[DFTB_MAXTYPES][DFTB_MAXTYPES], double dr[DFTB_MAXTYPES][DFTB_MAXTYPES],
+       int (*iovpar)(int, int, double, double [13], int [DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *[DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][3],
+         int [DFTB_MAXTYPES][DFTB_MAXTYPES], double [DFTB_MAXTYPES][DFTB_MAXTYPES]),
+       double em[LDIM][LDIM], tendoubles *skstab[DFTB_MAXTYPES][DFTB_MAXTYPES], tendoubles *skhtab[DFTB_MAXTYPES][DFTB_MAXTYPES], double skself[DFTB_MAXTYPES][3])
+{
+  int l, m, id;
+  double parm[13];
+
+  id = iovpar(i, j, r2, parm, lmax, skstab, skhtab, skself, dim, dr);
+  for (l=0; l<5; l++) {
+    for (m=0; m<5; m++)
+      em[4+m][4+l] = 0.0;
+    em[4+l][4+l] = parm[10];
+  }
+  return;
+}
+
diff -rupN gromacs-5.0/src/gromacs/mdlib/qmmm.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qmmm.c
--- gromacs-5.0/src/gromacs/mdlib/qmmm.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/qmmm.c	2014-10-15 02:01:08.000000000 +0200
@@ -118,6 +118,11 @@ real
 call_orca(t_forcerec *fr, t_QMrec *qm,
           t_MMrec *mm, rvec f[], rvec fshift[]);
 
+#elif defined GMX_QMMM_DFTB
+/* DFTB source code */
+
+#include "qm_dftb.h"
+
 #endif
 
 
@@ -154,7 +159,7 @@ static int QMlayer_comp(const void *a, c
 } /* QMlayer_comp */
 
 real call_QMroutine(t_commrec gmx_unused *cr, t_forcerec gmx_unused *fr, t_QMrec gmx_unused *qm,
-                    t_MMrec gmx_unused *mm, rvec gmx_unused f[], rvec gmx_unused fshift[])
+                    t_MMrec gmx_unused *mm, rvec gmx_unused f[], rvec gmx_unused fshift[], matrix box)
 {
     /* makes a call to the requested QM routine (qm->QMmethod)
      * Note that f is actually the gradient, i.e. -f
@@ -162,6 +167,14 @@ real call_QMroutine(t_commrec gmx_unused
     real
         QMener = 0.0;
 
+#ifdef GMX_QMMM_DFTB
+    /* call the DFTB energy calculation
+     */
+    prepare_dftb(qm, mm);
+    QMener = run_dftb1(qm->dftb, f, fshift, cr, fr, box);
+
+#else
+
     /* do a semi-empiprical calculation */
 
     if (qm->QMmethod < eQMmethodRHF && !(mm->nrMMatoms))
@@ -203,6 +216,8 @@ real call_QMroutine(t_commrec gmx_unused
 #endif
         }
     }
+#endif /* DFTB */
+
     return (QMener);
 }
 
@@ -210,6 +225,12 @@ void init_QMroutine(t_commrec gmx_unused
 {
     /* makes a call to the requested QM routine (qm->QMmethod)
      */
+
+#ifdef GMX_QMMM_DFTB
+    /* initialize DFTB?
+     */
+#else
+
     if (qm->QMmethod < eQMmethodRHF)
     {
 #ifdef GMX_QMMM_MOPAC
@@ -232,6 +253,7 @@ void init_QMroutine(t_commrec gmx_unused
         gmx_fatal(FARGS, "Ab-initio calculation only supported with Gamess, Gaussian or ORCA.");
 #endif
     }
+#endif /* DFTB */
 } /* init_QMroutine */
 
 void update_QMMM_coord(rvec x[], t_forcerec *fr, t_QMrec *qm, t_MMrec *mm)
@@ -253,7 +275,12 @@ void update_QMMM_coord(rvec x[], t_force
      */
     for (i = 0; i < mm->nrMMatoms; i++)
     {
-        rvec_sub(x[mm->indexMM[i]], fr->shift_vec[mm->shiftMM[i]], mm->xMM[i]);
+        /* do not do it!
+         * it would break the calculation of the surface correction
+         * in the Ewald sum!
+         */
+        //rvec_sub(x[mm->indexMM[i]], fr->shift_vec[mm->shiftMM[i]], mm->xMM[i]);
+        copy_rvec(x[mm->indexMM[i]], mm->xMM[i]);
     }
 } /* update_QMMM_coord */
 
@@ -302,7 +329,7 @@ static void punch_QMMM_excl(t_QMrec *qm,
         }
         fprintf(out, "\n");
     }
-    free(excluded);
+    free(excluded); /* TODO - should this be better sfree()? */
     fclose(out);
 } /* punch_QMMM_excl */
 
@@ -464,6 +491,210 @@ t_QMMMrec *mk_QMMMrec(void)
 
 } /* mk_QMMMrec */
 
+#ifdef GMX_QMMM_DFTB
+
+void init_QMMMrec(t_commrec  *cr,
+                  gmx_mtop_t *mtop,
+                  t_inputrec *ir,
+                  t_forcerec *fr)
+{
+    /* we put the atomsnumbers of atoms that belong to the QMMM group in
+     * an array that will be copied later to QMMMrec->indexQM[..]. Also
+     * it will be used to create an QMMMrec->bQMMM index array that
+     * simply contains true/false for QM and MM (the other) atoms.
+     */
+
+    gmx_groups_t            *groups;
+    atom_id                 *qm_arr = NULL, vsite, ai, aj;
+    int                      qm_max = 0, qm_nr = 0, i, j, jmax, k, l, nrvsite2 = 0;
+    t_QMMMrec               *qr;
+    t_MMrec                 *mm;
+    t_iatom                 *iatoms;
+    gmx_mtop_atomloop_all_t  aloop;
+    t_atom                  *atom,
+                            *mm_atom;
+    gmx_mtop_ilistloop_all_t iloop;
+    int                      a_offset;
+    t_ilist                 *ilist_mol;
+    gmx_mtop_atomlookup_t    alook;
+    int                      found_element;
+    int                      is_mm_atom, found_mm_atoms;
+
+    /* issue a fatal if the user wants to run with more than one node */
+    if (PAR(cr))
+    {
+        /* gmx_fatal(FARGS, "QM/MM does not work in parallel, use a single node instead\n"); */
+        /* Tomas Kubar - try out anyway! */
+        fprintf(stderr, "\nQM/MM possibly does not work in parallel, calculation on your own risk :-)\n\n");
+    }
+
+    /* Make a local copy of the QMMMrec */
+    qr = fr->qr;
+
+    /* bQMMM[..] is an array containing TRUE/FALSE for atoms that are
+     * QM/not QM. We first set all elemenst at false. Afterwards we use
+     * the qm_arr (=MMrec->indexQM) to changes the elements
+     * corresponding to the QM atoms at TRUE.  */
+
+    qr->QMMMscheme     = ir->QMMMscheme;
+
+    /* we take the possibility into account that a user has
+     * defined more than one QM group:
+     */
+    /* an ugly work-around in case there is only one group In this case
+     * the whole system is treated as QM. Otherwise the second group is
+     * always the rest of the total system and is treated as MM.
+     */
+
+    /* small problem if there is only QM.... so no MM */
+
+    if (ir->opts.ngQM > 1) {
+        fprintf(stderr, "\nQM/MM with DFTB cannot calculate more than 1 group of atoms at the moment\nExiting!\n\n");
+        exit(-1);
+    }
+    qr->nrQMlayers = 1;
+
+    groups = &mtop->groups;
+
+    snew(qr->qm, 1);
+
+    aloop = gmx_mtop_atomloop_all_init(mtop);
+    while (gmx_mtop_atomloop_all_next(aloop, &i, &atom))
+    {
+        if (qm_nr >= qm_max)
+        {
+            qm_max += 1000;
+            srenew(qm_arr, qm_max);
+        }
+        if (ggrpnr(groups, egcQMMM, i) == 0)
+        {
+            /* hack for tip4p */
+            qm_arr[qm_nr++] = i;
+        }
+    }
+
+    /* standard QMMM, all layers are merged together so there is one QM
+     * subsystem and one MM subsystem.
+     * Also we set the charges to zero in the md->charge arrays to prevent
+     * the innerloops from doubly counting the electostatic QM MM interaction
+     */
+
+    alook = gmx_mtop_atomlookup_init(mtop);
+
+    for (k = 0; k < qm_nr; k++)
+    {
+        gmx_mtop_atomnr_to_atom(alook, qm_arr[k], &atom);
+        atom->q  = 0.0;
+        atom->qB = 0.0;
+    }
+    qr->qm[0] = mk_QMrec();
+    /* store QM atoms in the QMrec and initialise
+     */
+    init_QMrec(0, qr->qm[0], qm_nr, qm_arr, mtop, ir);
+
+    /* find frontier atoms and mark them true in the frontieratoms array.
+     */
+    for (i = 0; i < qm_nr; i++)
+    {
+        gmx_mtop_atomnr_to_ilist(alook, qm_arr[i], &ilist_mol, &a_offset);
+        nrvsite2 = ilist_mol[F_VSITE2].nr;
+        iatoms   = ilist_mol[F_VSITE2].iatoms;
+
+        for (k = 0; k < nrvsite2; k += 4)
+        {
+            vsite = a_offset + iatoms[k+1]; /* the vsite         */
+            ai    = a_offset + iatoms[k+2]; /* constructing atom */
+            aj    = a_offset + iatoms[k+3]; /* constructing atom */
+            if (ggrpnr(groups, egcQMMM, ai) < (groups->grps[egcQMMM].nr-1) &&
+                (ggrpnr(groups, egcQMMM, aj) >= (groups->grps[egcQMMM].nr-1)))
+            {
+                /* mark ai as frontier atom */
+                if ( (qm_arr[i] == ai) || (qm_arr[i] == vsite) )
+                {
+                    qr->qm[0]->frontatoms[i] = TRUE;
+                }
+            }
+            else if (ggrpnr(groups, egcQMMM, aj) < (groups->grps[egcQMMM].nr-1) &&
+                     (ggrpnr(groups, egcQMMM, ai) >= (groups->grps[egcQMMM].nr-1)))
+            {
+                /* mark aj as frontier atom */
+                if ( (qm_arr[i] == aj) || (qm_arr[i] == vsite) )
+                {
+                    qr->qm[0]->frontatoms[i] = TRUE;
+                }
+            }
+        }
+    }
+
+    gmx_mtop_atomlookup_destroy(alook);
+
+    /* MM rec creation */
+    mm               = mk_MMrec();
+    mm->scalefactor  = ir->scalefactor;
+    mm->nrMMatoms    = (mtop->natoms)-(qr->qm[0]->nrQMatoms); /* rest of the atoms */
+    snew(mm->indexMM,    mm->nrMMatoms);
+    snew(mm->xMM,        mm->nrMMatoms);
+    snew(mm->MMcharges,  mm->nrMMatoms);
+    snew(mm->shiftMM,    mm->nrMMatoms);
+    snew(mm->MMatomtype, mm->nrMMatoms);
+    qr->mm           = mm;
+
+    /* fill the indexMM array */
+    found_mm_atoms = 0;
+    for (i=0; i<mtop->natoms; i++) {
+      is_mm_atom = 1;
+      for (j=0; j<qr->qm[0]->nrQMatoms; j++)
+        if (i == qr->qm[0]->indexQM[j])
+          is_mm_atom = 0;
+      if (is_mm_atom) {
+        mm->indexMM[found_mm_atoms] = i;
+	found_mm_atoms++;
+      }
+    }
+
+    /* DFTB */
+    printf ("(mtop->natoms) = %d\n(qr->qm[0]->nrQMatoms) = %d\nmm->nrMMatoms = %d\n",(mtop->natoms),(qr->qm[0]->nrQMatoms),mm->nrMMatoms);
+    printf ("(found_mm_atoms) = %d\n", found_mm_atoms);
+
+    /* DFTB initialization here */
+    qr->qm[0]->dftbsccmode = ir->QMdftbsccmode;
+    qr->qm[0]->dftbtelec = (double) ir->QMdftbtelec;
+    strcpy(qr->qm[0]->dftbslkopath, ir->QMdftbslkopath);
+    strcpy(qr->qm[0]->dftbslkoseparator, ir->QMdftbslkoseparator);
+    qr->qm[0]->dftbslkolowercase = ir->QMdftbslkolowercase;
+    strcpy(qr->qm[0]->dftbslkosuffix, ir->QMdftbslkosuffix);
+    qr->qm[0]->dftbpartialpme = ir->QMdftbpartialpme;
+    qr->qm[0]->dftbdispersion = ir->QMdftbdispersion;
+    qr->qm[0]->dftbcdko = ir->QMdftbcdko;
+    qr->qm[0]->dftbmmhubinf = ir->QMdftbmmhubinf;
+    init_dftb(qr->qm[0], qr->mm, ir);
+
+    /* DFTB with CDKO */
+    if (ir->QMdftbcdko) {
+      alook = gmx_mtop_atomlookup_init(mtop);
+      snew(qr->qm[0]->dftb->mm_element, qr->qm[0]->dftb->extcharges);
+      for (i=0; i<qr->qm[0]->dftb->extcharges; i++) {
+        gmx_mtop_atomnr_to_atom(alook, qr->mm->indexMM[i], &mm_atom);
+        found_element = 0;
+        for (j=0; j<qr->qm[0]->dftb->elements; j++) {
+          if (qr->qm[0]->dftb->element[j] == mtop->atomtypes.atomnumber[mm_atom->type]) {
+            qr->qm[0]->dftb->mm_element[i] = j;
+            found_element = 1;
+          }
+        }
+        if (! found_element) {
+          printf("The atomic number %d was found for MM atom no. %d,\n", mtop->atomtypes.atomnumber[mm_atom->type], qr->mm->indexMM[i]+1);
+          printf("  but not in the DFTB parameter files provided!\n  Exiting!\n");
+          exit (-1);
+        }
+      }
+    } /* CDKO */
+
+    return;
+} /* init_QMMMrec */
+
+#else
+
 void init_QMMMrec(t_commrec  *cr,
                   gmx_mtop_t *mtop,
                   t_inputrec *ir,
@@ -778,6 +1009,108 @@ void init_QMMMrec(t_commrec  *cr,
     }
 } /* init_QMMMrec */
 
+#endif
+
+#ifdef GMX_QMMM_DFTB
+void update_QMMMrec(t_commrec      *cr,
+                    t_forcerec     *fr,
+                    rvec            x[],
+                    t_mdatoms      *md,
+                    matrix          box,
+                    gmx_localtop_t gmx_unused *top)
+{
+    /* updates the coordinates of both QM atoms and MM atoms and stores
+     * them in the QMMMrec.
+     *
+     * NOTE: is NOT yet working if there are no PBC. Also in ns.c, simple
+     * ns needs to be fixed!
+     */
+    int
+        i;
+    t_QMMMrec
+       *qr;
+    rvec
+        dx, crd;
+    t_QMrec
+       *qm;
+    t_MMrec
+       *mm;
+    t_pbc
+        pbc;
+
+    /* copy some pointers */
+    qr          = fr->qr;
+    qm          = qr->qm[0]; /* in case of normal QMMM, there is only one group */
+    mm          = qr->mm;
+
+    /*  init_pbc(box);  needs to be called first, see pbc.h */
+    set_pbc_dd(&pbc, fr->ePBC, DOMAINDECOMP(cr) ? cr->dd : NULL, FALSE, box);
+
+    /* we NOW create/update a number of QMMMrec entries:
+     *
+     * 1) the shiftQM, containing the shifts of the QM atoms
+     *
+     * 2) the indexMM array, containing the index of the MM atoms
+     *
+     * 3) the shiftMM, containing the shifts of the MM atoms
+     *
+     * 4) the shifted coordinates of the MM atoms
+     *
+     * the shifts are used for computing virial of the QM/MM particles.
+     */
+
+    qm->shiftQM[0] = XYZ2IS(0, 0, 0);
+    for (i = 1; i < qm->nrQMatoms; i++)
+    {
+        qm->shiftQM[i] = pbc_dx_aiuc(&pbc, x[qm->indexQM[0]], x[qm->indexQM[i]], dx);
+    }
+
+    // /*
+    printf("There are %d QM atoms, namely:", qm->nrQMatoms);
+    for (i=0; i<qm->nrQMatoms; i++)
+      printf(" %d", qm->indexQM[i]);
+    printf("\n");
+    // */
+
+    /* compute the shift for the MM atoms with respect to
+     * the QM atom [0] and store them
+     */
+    rvec_sub(x[qm->indexQM[0]], fr->shift_vec[qm->shiftQM[0]], crd);
+    for (i=0; i<mm->nrMMatoms; i++) {
+        mm->shiftMM[i] = pbc_dx_aiuc(&pbc, crd, x[mm->indexMM[i]], dx);
+    }
+
+    /* previous version of the loop
+    for (i=0; i<mm->nrMMatoms; i++) {
+        current_shift = pbc_dx_aiuc(&pbc, x[qm->indexQM[0]], x[mm->indexMM[i]], dx);
+
+        crd[0] = IS2X(qm->shiftQM[0]) + IS2X(current_shift);
+        crd[1] = IS2Y(qm->shiftQM[0]) + IS2Y(current_shift);
+        crd[2] = IS2Z(qm->shiftQM[0]) + IS2Z(current_shift);
+        is     = XYZ2IS(crd[0], crd[1], crd[2]);
+        mm->shiftMM[i] = is;
+    }
+    */
+
+    /* printf("\n"); */
+    /* printf("Line 1008: mm_nr = %d\n", mm_nr); */
+
+    for (i = 0; i < mm->nrMMatoms; i++) /* no free energy yet */
+    {
+        mm->MMcharges[i] = md->chargeA[mm->indexMM[i]] * mm->scalefactor;
+    }
+
+    /* the next routine fills the coordinate fields in the QMMM rec of
+     * both the qunatum atoms and the MM atoms, using the shifts
+     * calculated above.
+     */
+    update_QMMM_coord(x, fr, qr->qm[0], qr->mm);
+
+    return;
+} /* update_QMMM_rec */
+
+#else
+
 void update_QMMMrec(t_commrec      *cr,
                     t_forcerec     *fr,
                     rvec            x[],
@@ -1077,11 +1410,12 @@ void update_QMMMrec(t_commrec      *cr,
         }
     }
 } /* update_QMMM_rec */
-
+#endif
 
 real calculate_QMMM(t_commrec *cr,
                     rvec x[], rvec f[],
-                    t_forcerec *fr)
+                    t_forcerec *fr,
+                    matrix box)
 {
     real
         QMener = 0.0;
@@ -1113,7 +1447,7 @@ real calculate_QMMM(t_commrec *cr,
         qm = qr->qm[0];
         snew(forces, (qm->nrQMatoms+mm->nrMMatoms));
         snew(fshift, (qm->nrQMatoms+mm->nrMMatoms));
-        QMener = call_QMroutine(cr, fr, qm, mm, forces, fshift);
+        QMener = call_QMroutine(cr, fr, qm, mm, forces, fshift, box);
         for (i = 0; i < qm->nrQMatoms; i++)
         {
             for (j = 0; j < DIM; j++)
@@ -1131,8 +1465,8 @@ real calculate_QMMM(t_commrec *cr,
             }
 
         }
-        free(forces);
-        free(fshift);
+        sfree(forces);
+        sfree(fshift);
     }
     else                                       /* Multi-layer ONIOM */
     {
@@ -1160,13 +1494,13 @@ real calculate_QMMM(t_commrec *cr,
             srenew(fshift, qm->nrQMatoms);
             /* we need to re-initialize the QMroutine every step... */
             init_QMroutine(cr, qm, mm);
-            QMener += call_QMroutine(cr, fr, qm, mm, forces, fshift);
+            QMener += call_QMroutine(cr, fr, qm, mm, forces, fshift, box);
 
             /* this layer at the lower level of theory */
             srenew(forces2, qm->nrQMatoms);
             srenew(fshift2, qm->nrQMatoms);
             init_QMroutine(cr, qm2, mm);
-            QMener -= call_QMroutine(cr, fr, qm2, mm, forces2, fshift2);
+            QMener -= call_QMroutine(cr, fr, qm2, mm, forces2, fshift2, box);
             /* E = E1high-E1low The next layer includes the current layer at
              * the lower level of theory, which provides + E2low
              * this is similar for gradients
@@ -1179,14 +1513,14 @@ real calculate_QMMM(t_commrec *cr,
                     fr->fshift[qm->shiftQM[i]][j] += (fshift[i][j]-fshift2[i][j]);
                 }
             }
-            free(qm2);
+            free(qm2); /* TODO - sfree()? */
         }
         /* now the last layer still needs to be done: */
         qm      = qr->qm[qr->nrQMlayers-1]; /* C counts from 0 */
         init_QMroutine(cr, qm, mm);
         srenew(forces, qm->nrQMatoms);
         srenew(fshift, qm->nrQMatoms);
-        QMener += call_QMroutine(cr, fr, qm, mm, forces, fshift);
+        QMener += call_QMroutine(cr, fr, qm, mm, forces, fshift, box);
         for (i = 0; i < qm->nrQMatoms; i++)
         {
             for (j = 0; j < DIM; j++)
@@ -1195,7 +1529,7 @@ real calculate_QMMM(t_commrec *cr,
                 fr->fshift[qm->shiftQM[i]][j] += fshift[i][j];
             }
         }
-        free(forces);
+        free(forces); /* TODO - sfree()? */
         free(fshift);
         free(forces2);
         free(fshift2);
diff -rupN gromacs-5.0/src/gromacs/mdlib/sim_util.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/sim_util.c
--- gromacs-5.0/src/gromacs/mdlib/sim_util.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/sim_util.c	2014-09-08 00:41:30.000000000 +0200
@@ -1326,10 +1326,20 @@ void do_force_cutsVERLET(FILE *fplog, t_
     }
 
     /* update QMMMrec, if necessary */
+#pragma omp single
+    {
     if (fr->bQMMM)
     {
+#ifndef GMX_QMMM_DFTB
+        if (bNS)
+        {
+	    /* neighborsearching first -- in this prelim version, use the old, group-based thing */
+	    ns_qmmm(fr, box, groups, top, mdatoms, cr, nrnb, bFillGrid);
+	}
+#endif
         update_QMMMrec(cr, fr, x, mdatoms, box, top);
     }
+    }
 
     if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
     {
@@ -1873,10 +1883,13 @@ void do_force_cutsGROUP(FILE *fplog, t_c
     }
 
     /* update QMMMrec, if necessary */
+#pragma omp single
+    {
     if (fr->bQMMM)
     {
         update_QMMMrec(cr, fr, x, mdatoms, box, top);
     }
+    }
 
     if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
     {
diff -rupN gromacs-5.0/src/gromacs/mdlib/tables.c gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/tables.c
--- gromacs-5.0/src/gromacs/mdlib/tables.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/gromacs/mdlib/tables.c	2014-08-19 22:14:33.000000000 +0200
@@ -55,6 +55,8 @@
 #include "macros.h"
 #include "tables.h"
 
+#include <stdio.h>
+
 /* All the possible (implemented) table functions */
 enum {
     etabLJ6,
@@ -296,17 +298,26 @@ void table_spline3_fill_ewald_lr(real
         /* Copy to FDV0 table too. Allocation occurs in forcerec.c,
          * init_ewald_f_table().
          */
+        printf("SAVING EWALD TABLE\n");
+        FILE *file;
+        file = fopen("ewald_table", "w");
         for (i = 0; i < ntab-1; i++)
         {
             table_fdv0[4*i]     = table_f[i];
             table_fdv0[4*i+1]   = table_f[i+1]-table_f[i];
             table_fdv0[4*i+2]   = table_v[i];
             table_fdv0[4*i+3]   = 0.0;
+            fprintf(file, "%6d %16.8f\n%6d %16.8f\n%6d %16.8f\n%6d %16.8f\n",
+              4*i, table_fdv0[4*i], 4*i+1, table_fdv0[4*i+1], 4*i+2, table_fdv0[4*i+2], 4*i+3, table_fdv0[4*i+3]);
         }
         table_fdv0[4*(ntab-1)]    = table_f[(ntab-1)];
         table_fdv0[4*(ntab-1)+1]  = -table_f[(ntab-1)];
         table_fdv0[4*(ntab-1)+2]  = table_v[(ntab-1)];
         table_fdv0[4*(ntab-1)+3]  = 0.0;
+            i=ntab-1;
+            fprintf(file, "%6d %16.8f\n%6d %16.8f\n%6d %16.8f\n%6d %16.8f\n",
+              4*i, table_fdv0[4*i], 4*i+1, table_fdv0[4*i+1], 4*i+2, table_fdv0[4*i+2], 4*i+3, table_fdv0[4*i+3]);
+        fclose(file);
     }
 }
 
diff -rupN gromacs-5.0/src/programs/mdrun/md.c gromacs-5.0-dftb-v6-plumed/src/programs/mdrun/md.c
--- gromacs-5.0/src/programs/mdrun/md.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/programs/mdrun/md.c	2014-10-23 16:59:10.000000000 +0200
@@ -96,6 +96,12 @@
 #include "gromacs/swap/swapcoords.h"
 #include "gromacs/imd/imd.h"
 
+/* PLUMED */
+#include "../../../Plumed.h"
+extern int    plumedswitch;
+extern plumed plumedmain;
+/* END PLUMED */
+
 #ifdef GMX_FAHCORE
 #include "corewrap.h"
 #endif
@@ -224,6 +230,11 @@ double do_md(FILE *fplog, t_commrec *cr,
     /* Interactive MD */
     gmx_bool          bIMDstep = FALSE;
 
+    /* PLUMED */
+    int plumedNeedsEnergy=0;
+    int plumedWantsToStop=0;
+    /* END PLUMED */
+
 #ifdef GMX_FAHCORE
     /* Temporary addition for FAHCORE checkpointing */
     int chkpt_ret;
@@ -651,6 +662,48 @@ double do_md(FILE *fplog, t_commrec *cr,
         fprintf(fplog, "\n");
     }
 
+    /* PLUMED */
+    if(plumedswitch){
+      /* detect plumed API version */
+      int pversion=0;
+      plumed_cmd(plumedmain,"getApiVersion",&pversion);
+      /* setting kbT is only implemented with api>1) */
+      real kbT=ir->opts.ref_t[0]*BOLTZ;
+      if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT);
+
+      if(cr->ms && cr->ms->nsim>1) {
+        if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters);
+        if(PAR(cr)){
+          if(DOMAINDECOMP(cr)) {
+            plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all);
+          }else{
+            plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim);
+          }
+        }
+        plumed_cmd(plumedmain,"GREX init",NULL);
+      }
+      if(PAR(cr)){
+        if(DOMAINDECOMP(cr)) {
+          plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all);
+        }
+      }
+      plumed_cmd(plumedmain,"setNatoms",&top_global->natoms);
+      plumed_cmd(plumedmain,"setMDEngine","gromacs");
+      plumed_cmd(plumedmain,"setLog",fplog);
+      real real_delta_t;
+      real_delta_t=ir->delta_t;
+      plumed_cmd(plumedmain,"setTimestep",&real_delta_t);
+      plumed_cmd(plumedmain,"init",NULL);
+
+      if(PAR(cr)){
+        if(DOMAINDECOMP(cr)) {
+          plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home);
+          plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex);
+        }
+      }
+    }
+    /* END PLUMED */
+
     walltime_accounting_start(walltime_accounting);
     wallcycle_start(wcycle, ewcRUN);
     print_start(fplog, cr, walltime_accounting, "mdrun");
@@ -672,6 +725,18 @@ double do_md(FILE *fplog, t_commrec *cr,
      *
      ************************************************************/
 
+    /* QM/MM DFTB */
+    FILE *f_qm_dftb_charges=NULL, *f_qm_dftb_qm_qxyz=NULL, *f_qm_dftb_mm_qxyz=NULL;
+    int counter_tom;
+    if (fr->bQMMM)
+    {
+        f_qm_dftb_charges = fopen("qm_dftb_charges.xvg", "w");
+        if (fr->qr->qm[0]->dftb->output_qm_freq > 0)
+          f_qm_dftb_qm_qxyz = fopen("qm.qxyz", "w");
+        if (fr->qr->qm[0]->dftb->output_mm_freq > 0)
+          f_qm_dftb_mm_qxyz = fopen("mm.qxyz", "w");
+    }
+
     /* if rerunMD then read coordinates and velocities from input trajectory */
     if (bRerunMD)
     {
@@ -955,6 +1020,13 @@ double do_md(FILE *fplog, t_commrec *cr,
                                     do_verbose && !bPMETuneRunning);
                 wallcycle_stop(wcycle, ewcDOMDEC);
                 /* If using an iterative integrator, reallocate space to match the decomposition */
+
+                /* PLUMED */
+                if(plumedswitch){
+                  plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home);
+                  plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex);
+                }
+                /* END PLUMED */
             }
         }
 
@@ -1078,12 +1150,90 @@ double do_md(FILE *fplog, t_commrec *cr,
              * This is parallellized as well, and does communication too.
              * Check comments in sim_util.c
              */
+
+            /* PLUMED */
+            plumedNeedsEnergy=0;
+            if(plumedswitch){
+              long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep);
+              plumed_cmd(plumedmain,"setPositions",&state->x[0][0]);
+              plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[0]);
+              plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[0]);
+              plumed_cmd(plumedmain,"setBox",&state->box[0][0]);
+              plumed_cmd(plumedmain,"prepareCalc",NULL);
+              plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop);
+              plumed_cmd(plumedmain,"setForces",&f[0][0]);
+              plumed_cmd(plumedmain,"setVirial",&force_vir[0][0]);
+              plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
+            }
+            /* END PLUMED */
+
             do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups,
                      state->box, state->x, &state->hist,
                      f, force_vir, mdatoms, enerd, fcd,
                      state->lambda, graph,
                      fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii,
                      (bNS ? GMX_FORCE_NS : 0) | force_flags);
+
+            /* PLUMED */
+            if(plumedswitch){
+              if(plumedNeedsEnergy){
+                plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]);
+                plumed_cmd(plumedmain,"performCalc",NULL);
+              }
+              if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
+                 do_per_step(step,repl_ex_nst)) plumed_cmd(plumedmain,"GREX savePositions",NULL);
+              if(plumedWantsToStop) ir->nsteps=step_rel+1;
+            }
+            /* END PLUMED */
+
+            /* QM/MM - DFTB */
+            if (fr->bQMMM)
+            {
+              /* DFTB charges output */
+              fprintf(f_qm_dftb_charges, "%14.4f", t);
+              for (counter_tom=0; counter_tom < fr->qr->qm[0]->dftb->phase1.nn; counter_tom++)
+                fprintf(f_qm_dftb_charges, "%10.6f", -fr->qr->qm[0]->dftb->phase1.qmat[counter_tom]
+                                                     + fr->qr->qm[0]->dftb->qzero1[fr->qr->qm[0]->dftb->phase1.izp[counter_tom]]);
+              //for (counter_tom=0; counter_tom < fr->qr->qm[0]->dftb->phase1.nn; counter_tom++)
+              //  fprintf(f_qm_dftb_charges, "%10.4f", fr->qr->qm[0]->dftb->phase1.pot[counter_tom]);
+              //for (counter_tom=0; counter_tom < fr->qr->qm[0]->dftb->phase1.nn; counter_tom++)
+              //  fprintf(f_qm_dftb_charges, "%10.6f", fr->qr->qm[0]->dftb->phase1.shiftE[counter_tom]);
+              fprintf(f_qm_dftb_charges, "\n");
+              /* DFTB coordinates output */
+              if (fr->qr->qm[0]->dftb->output_qm_freq > 0 && step % fr->qr->qm[0]->dftb->output_qm_freq == 0) {
+                   char periodic_system[37][3]={"XX","h", "he",
+                                   "li","be","b", "c", "n", "o", "f", "ne",
+                                   "na","mg","al","si","p", "s", "cl","ar",
+                                   "k", "ca","sc","ti","v", "cr","mn","fe","co",
+                                   "ni","cu","zn","ga","ge","as","se","br","kr"};
+                fprintf(f_qm_dftb_qm_qxyz, "%d\nQM coordinates and charges step %d\n", fr->qr->qm[0]->dftb->phase1.nn, step);
+                for (counter_tom=0; counter_tom < fr->qr->qm[0]->dftb->phase1.nn; counter_tom++)
+                  fprintf(f_qm_dftb_qm_qxyz, "%-2s %10.5f%10.5f%10.5f %10.7f\n",
+                    periodic_system[fr->qr->qm[0]->atomicnumberQM[counter_tom]],
+                    fr->qr->qm[0]->dftb->phase1.x[counter_tom][0] * BOHR2NM * 10.,
+                    fr->qr->qm[0]->dftb->phase1.x[counter_tom][1] * BOHR2NM * 10.,
+                    fr->qr->qm[0]->dftb->phase1.x[counter_tom][2] * BOHR2NM * 10.,
+                    -fr->qr->qm[0]->dftb->phase1.qmat[counter_tom] + fr->qr->qm[0]->dftb->qzero1[fr->qr->qm[0]->dftb->phase1.izp[counter_tom]]);
+              }
+              if (fr->qr->qm[0]->dftb->output_mm_freq > 0 && step % fr->qr->qm[0]->dftb->output_mm_freq == 0) {
+                fprintf(f_qm_dftb_mm_qxyz, "%d\nMM coordinates and charges step %d\n", fr->qr->qm[0]->dftb->phase1.ne, step);
+                for (counter_tom=0; counter_tom < fr->qr->qm[0]->dftb->phase1.ne; counter_tom++)
+                  fprintf(f_qm_dftb_mm_qxyz, "%10.7f%10.5f%10.5f%10.5f\n",
+                    fr->qr->qm[0]->dftb->phase1.ze[counter_tom],
+                    fr->qr->qm[0]->dftb->phase1.xe[counter_tom][0] * BOHR2NM * 10.,
+                    fr->qr->qm[0]->dftb->phase1.xe[counter_tom][1] * BOHR2NM * 10.,
+                    fr->qr->qm[0]->dftb->phase1.xe[counter_tom][2] * BOHR2NM * 10.);
+              }
+                
+              //for (counter_tom=0; counter_tom < fr->qr->qm[0]->dftb->phase1.nn; counter_tom++)
+              //  printf("grad atom %2d: %10.5f%10.5f%10.5f\n", counter_tom+1,
+              //    fr->qr->qm[0]->dftb->phase1.grad[counter_tom][0], fr->qr->qm[0]->dftb->phase1.grad[counter_tom][1], fr->qr->qm[0]->dftb->phase1.grad[counter_tom][2]);
+              //for (counter_tom=0; counter_tom < fr->qr->qm[0]->dftb->phase1.ne; counter_tom++)
+              //  if (dnorm2(fr->qr->qm[0]->dftb->phase1.mmgrad[counter_tom]) > 0.00001)
+              //  printf("grad MMat %2d: %10.5f%10.5f%10.5f\n", counter_tom+1,
+              //    fr->qr->qm[0]->dftb->phase1.mmgrad[counter_tom][0], fr->qr->qm[0]->dftb->phase1.mmgrad[counter_tom][1], fr->qr->qm[0]->dftb->phase1.mmgrad[counter_tom][2]);
+            }
+
         }
 
         if (bVV && !bStartingFromCpt && !bRerunMD)
@@ -1962,6 +2112,11 @@ double do_md(FILE *fplog, t_commrec *cr,
     /* End of main MD loop */
     debug_gmx();
 
+    /* QM/MM - DFTB */
+    if (f_qm_dftb_charges) fclose(f_qm_dftb_charges);
+    if (f_qm_dftb_qm_qxyz) fclose(f_qm_dftb_qm_qxyz);
+    if (f_qm_dftb_mm_qxyz) fclose(f_qm_dftb_mm_qxyz);
+
     /* Stop measuring walltime */
     walltime_accounting_end(walltime_accounting);
 
diff -rupN gromacs-5.0/src/programs/mdrun/mdrun.cpp gromacs-5.0-dftb-v6-plumed/src/programs/mdrun/mdrun.cpp
--- gromacs-5.0/src/programs/mdrun/mdrun.cpp	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/programs/mdrun/mdrun.cpp	2014-10-23 17:01:49.000000000 +0200
@@ -56,6 +56,13 @@
 #include "gromacs/commandline/pargs.h"
 #include "gromacs/fileio/filenm.h"
 
+/* PLUMED */
+#include "../../../Plumed.h"
+extern int    plumedswitch;
+extern plumed plumedmain; 
+extern void(*plumedcmd)(plumed,const char*,const void*);
+/* END PLUMED */
+
 int gmx_mdrun(int argc, char *argv[])
 {
     const char   *desc[] = {
@@ -428,6 +435,7 @@ int gmx_mdrun(int argc, char *argv[])
         { efMTX, "-mtx",    "nm",       ffOPTWR },
         { efNDX, "-dn",     "dipole",   ffOPTWR },
         { efRND, "-multidir", NULL,      ffOPTRDMULT},
+        { efDAT, "-plumed", "plumed",   ffOPTRD },   /* PLUMED */
         { efDAT, "-membed", "membed",   ffOPTRD },
         { efTOP, "-mp",     "membed",   ffOPTRD },
         { efNDX, "-mn",     "membed",   ffOPTRD },
@@ -780,6 +788,32 @@ int gmx_mdrun(int argc, char *argv[])
     ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
     ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
 
+    /* PLUMED */
+    plumedswitch=0;
+    if (opt2bSet("-plumed",NFILE,fnm)) plumedswitch=1;
+    if(plumedswitch){
+      plumedcmd=plumed_cmd;
+      int plumed_is_there=0;
+      int real_precision=sizeof(real);
+      real energyUnits=1.0;
+      real lengthUnits=1.0;
+      real timeUnits=1.0;
+  
+      if(!plumed_installed()){
+        gmx_fatal(FARGS,"Plumed is not available. Check your PLUMED_KERNEL variable.");
+      }
+      plumedmain=plumed_create();
+      plumed_cmd(plumedmain,"setRealPrecision",&real_precision);
+      // this is not necessary for gromacs units:
+      plumed_cmd(plumedmain,"setMDEnergyUnits",&energyUnits);
+      plumed_cmd(plumedmain,"setMDLengthUnits",&lengthUnits);
+      plumed_cmd(plumedmain,"setMDTimeUnits",&timeUnits);
+      //
+      plumed_cmd(plumedmain,"setPlumedDat",ftp2fn(efDAT,NFILE,fnm));
+      plumedswitch=1;
+    }
+    /* END PLUMED */
+
     rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact,
                   nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr,
                   dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz,
@@ -788,6 +822,12 @@ int gmx_mdrun(int argc, char *argv[])
                   nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed,
                   pforce, cpt_period, max_hours, deviceOptions, imdport, Flags);
 
+    /* PLUMED */
+    if(plumedswitch){
+      plumed_finalize(plumedmain);
+    }
+    /* END PLUMED */
+
     /* Log file has to be closed in mdrunner if we are appending to it
        (fplog not set here) */
     if (MASTER(cr) && !bAppendFiles)
diff -rupN gromacs-5.0/src/programs/mdrun/pme_loadbal.c gromacs-5.0-dftb-v6-plumed/src/programs/mdrun/pme_loadbal.c
--- gromacs-5.0/src/programs/mdrun/pme_loadbal.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/programs/mdrun/pme_loadbal.c	2014-09-08 16:15:49.000000000 +0200
@@ -718,7 +718,7 @@ gmx_bool pme_load_balance(pme_load_balan
      * But we do with hybrid acceleration and with free energy.
      * To avoid bugs, we always re-initialize the simple tables here.
      */
-    init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab);
+    init_interaction_const_tables(NULL, ic, bUsesSimpleTables, rtab); //, state->box);
 
     if (cr->duty & DUTY_PME)
     {
diff -rupN gromacs-5.0/src/programs/mdrun/repl_ex.c gromacs-5.0-dftb-v6-plumed/src/programs/mdrun/repl_ex.c
--- gromacs-5.0/src/programs/mdrun/repl_ex.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/programs/mdrun/repl_ex.c	2014-10-23 17:05:11.000000000 +0200
@@ -51,6 +51,12 @@
 #include "domdec.h"
 #include "gromacs/random/random.h"
 
+/* PLUMED */
+#include "../../../Plumed.h"
+extern int    plumedswitch;
+extern plumed plumedmain;
+/* END PLUMED */
+
 #define PROBABILITYCUTOFF 100
 /* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */
 
@@ -982,6 +988,10 @@ test_for_replica_exchange(FILE
         pind[i] = re->ind[i];
     }
 
+    /* PLUMED */
+    int plumed_test_exchange_pattern=0;
+    /* END PLUMED */
+
     if (bMultiEx)
     {
         /* multiple random switch exchange */
@@ -1057,6 +1067,33 @@ test_for_replica_exchange(FILE
         /* standard nearest neighbor replica exchange */
 
         m = (step / re->nst) % 2;
+
+        /* PLUMED */
+        if(plumedswitch){
+          int partner=re->repl;
+          plumed_cmd(plumedmain,"getExchangesFlag",&plumed_test_exchange_pattern);
+          if(plumed_test_exchange_pattern>0){
+            int *list;
+            snew(list,re->nrepl);
+            plumed_cmd(plumedmain,"setNumberOfReplicas",&(re->nrepl));
+            plumed_cmd(plumedmain,"getExchangesList",list);
+            for(i=0; i<re->nrepl; i++) re->ind[i]=list[i];
+            sfree(list);
+          }
+
+          for(i=1; i<re->nrepl; i++) {
+            if (i % 2 != m) continue;
+            a = re->ind[i-1];
+            b = re->ind[i];
+            if(re->repl==a) partner=b;
+            if(re->repl==b) partner=a;
+          }
+          plumed_cmd(plumedmain,"GREX setPartner",&partner);
+          plumed_cmd(plumedmain,"GREX calculate",NULL);
+          plumed_cmd(plumedmain,"GREX shareAllDeltaBias",NULL);
+        }
+        /* END PLUMED */
+
         for (i = 1; i < re->nrepl; i++)
         {
             a = re->ind[i-1];
@@ -1066,6 +1103,20 @@ test_for_replica_exchange(FILE
             if (i % 2 == m)
             {
                 delta = calc_delta(fplog, bPrint, re, a, b, a, b);
+
+                /* PLUMED */
+                if(plumedswitch){
+                  real adb,bdb,dplumed;
+                  char buf[300];
+                  sprintf(buf,"GREX getDeltaBias %d",a); plumed_cmd(plumedmain,buf,&adb);
+                  sprintf(buf,"GREX getDeltaBias %d",b); plumed_cmd(plumedmain,buf,&bdb);
+                  dplumed=adb*re->beta[a]+bdb*re->beta[b];
+                  delta+=dplumed;
+                  if (bPrint)
+                    fprintf(fplog,"dplumed = %10.3e  dE_Term = %10.3e (kT)\n",dplumed,delta);
+                }
+                /* END PLUMED */
+
                 if (delta <= 0)
                 {
                     /* accepted */
@@ -1092,11 +1143,22 @@ test_for_replica_exchange(FILE
 
                 if (bEx[i])
                 {
+                  /* PLUMED */
+                  if(!plumed_test_exchange_pattern) {
+                    /* standard neighbour swapping */
                     /* swap these two */
                     tmp       = pind[i-1];
                     pind[i-1] = pind[i];
                     pind[i]   = tmp;
                     re->nexchange[i]++;  /* statistics for back compatibility */
+                  } else {
+                    /* alternative swapping patterns */
+                    tmp       = pind[a];
+                    pind[a]   = pind[b];
+                    pind[b]   = tmp;
+                    re->nexchange[i]++;  /* statistics for back compatibility */
+                  }
+                  /* END PLUMED */
                 }
             }
             else
@@ -1112,6 +1174,15 @@ test_for_replica_exchange(FILE
         re->nattempt[m]++;
     }
 
+    /* PLUMED */
+    if(plumed_test_exchange_pattern>0) {
+      for (i = 0; i < re->nrepl; i++)
+      {
+          re->ind[i] = i;
+      }
+    }
+    /* END PLUMED */
+
     /* record which moves were made and accepted */
     for (i = 0; i < re->nrepl; i++)
     {
@@ -1316,6 +1387,10 @@ gmx_bool replica_exchange(FILE *fplog, c
     /* The order in which multiple exchanges will occur. */
     gmx_bool bThisReplicaExchanged = FALSE;
 
+    /* PLUMED */
+    if(plumedswitch)plumed_cmd(plumedmain,"GREX prepare",NULL);
+    /* END PLUMED */
+
     if (MASTER(cr))
     {
         replica_id  = re->repl;
diff -rupN gromacs-5.0/src/programs/mdrun/runner.c gromacs-5.0-dftb-v6-plumed/src/programs/mdrun/runner.c
--- gromacs-5.0/src/programs/mdrun/runner.c	2014-06-29 17:33:50.000000000 +0200
+++ gromacs-5.0-dftb-v6-plumed/src/programs/mdrun/runner.c	2014-08-18 12:20:41.000000000 +0200
@@ -1526,14 +1526,18 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
 #endif
 
     /* Check and update hw_opt for the cut-off scheme */
+    /* Tomas Kubar disabled this:
     check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);
+    */
 
     gmx_omp_nthreads_init(fplog, cr,
                           hwinfo->nthreads_hw_avail,
                           hw_opt->nthreads_omp,
                           hw_opt->nthreads_omp_pme,
                           (cr->duty & DUTY_PP) == 0,
+                          TRUE); /* Tomas Kubar - let us try this for ecutsGROUP as well
                           inputrec->cutoff_scheme == ecutsVERLET);
+                          */
 
     if (PAR(cr))
     {