#include "dprec.h" subroutine check_skin(crd,savcrd,numatoms,recip,ucell, $ skinnb,result,maxdis) C C************************************************************************ C AMBER ** C ** C Copyright (c) 1986, 1991, 1995, 1997, 1999 ** C Regents of the University of California ** C All Rights Reserved. ** C ** C This software provided pursuant to a license agreement containing ** C restrictions on its disclosure, duplication, and use. This software ** C contains confidential and proprietary information, and may not be ** C extracted or distributed, in whole or in part, for any purpose ** C whatsoever, without the express written permission of the authors. ** C This notice, and the associated author list, must be attached to ** C all copies, or extracts, of this software. Any additional ** C restrictions set forth in the license agreement also apply to this ** C software. ** C************************************************************************ C c--------------------------------------------------------------------- c --- CHECK_SKIN ----- c this routine checks if any atom has moved more than half the c skin distance c--------------------------------------------------------------------- implicit none #include "ew_cntrl.h" integer numatoms,result _REAL_ crd(3,numatoms),savcrd(3,numatoms), $ recip(3,3),ucell(3,3),skinnb,maxdis integer n _REAL_ dx,dy,dz,dfx,dfy,dfz,dis2,maxdis2 _REAL_ dnint maxdis2 = 0.0d0 do n = 1,numatoms dx = crd(1,n) - savcrd(1,n) dy = crd(2,n) - savcrd(2,n) dz = crd(3,n) - savcrd(3,n) c c ---columns of recip are reciprocal unit cell vecs c dfx = dx * recip(1,1) + dy * recip(2,1) + dz * recip(3,1) dfy = dx * recip(1,2) + dy * recip(2,2) + dz * recip(3,2) dfz = dx * recip(1,3) + dy * recip(2,3) + dz * recip(3,3) dfx = dfx - dnint(dfx) dfy = dfy - dnint(dfy) dfz = dfz - dnint(dfz) c c ---columns of ucell are direct unit cell vecs c dx = dfx * ucell(1,1) + dfy * ucell(1,2) + dfz * ucell(1,3) dy = dfx * ucell(2,1) + dfy * ucell(2,2) + dfz * ucell(2,3) dz = dfx * ucell(3,1) + dfy * ucell(3,2) + dfz * ucell(3,3) dis2 = dx**2 + dy**2 + dz**2 maxdis2 = max(dis2,maxdis2) end do maxdis = sqrt(maxdis2) c C if anybody moves more than half the nbskin added to cutoff C in generating the verlet list, update the list c result = 1 if ( maxdis2 .lt. 0.25d0 * skinnb * skinnb) result = 0 return end c--------------------------------------------------------------------- c c --- EWALD_LIST --- c c ...handles set-up and error checking for calling of c get_nb_list which creates the nonbond list... c c---------------------------------------------------------------------- subroutine ewald_list(crd,iac,ico,iblo,inb,ntypes, $ numatoms,X,IX,r_stack,i_stack, $ ipairs,ntnb,ibelly,belly,newbalance #ifdef ROWAT $ ,v,vold,ntp,xr #endif $ ) implicit none c integer ntnb integer ipairs(*) _REAL_ crd(3,*), r_stack(*) integer i_stack(*) integer numatoms,iac(*),ico(*),iblo(*),inb(*),ntypes _REAL_ X(*) integer IX(*),ibelly(*) integer newbalance integer ngrd1,ngrd2,ngrd3 integer igridpairs,sizgrdprs integer last_numlist,listdiff,listdiffmax,isiz logical belly,trial,balance #include "extra.h" #include "ew_cntrl.h" #include "ew_unitcell.h" #include "ew_localnb.h" #include "ew_mpole.h" c #ifdef MPI # include "parallel.h" # include "ew_parallel.h" # ifdef MPI_DOUBLE_PRECISION # undef MPI_DOUBLE_PRECISION # endif # include "mpif.h" # ifdef CRAY_PVP # define MPI_DOUBLE_PRECISION MPI_REAL8 # endif integer tmplist(0:MPI_MAX_PROCESSORS), . alllist(0:MPI_MAX_PROCESSORS) #else /* not parallel needs numtasks and mytaskid */ integer numtasks,mytaskid #endif c #include "ew_numtasks.h" #include "ew_tran.h" #include "def_time.h" C c c ARGUMENTS: (all are input) c CRD is the array holding atomic coords. c IAC and ICO are used to look up vdw and hbond interactions. c ICO is an NTYPES by NTYPES array giving lookup into VDW and HBOND c coefficients. c IAC(i) is the atom type of atom i. The coefficients c for 6-12 interactions for atoms i and j are indexed by c ICO(iac(i),iac(j)). In practice ICO is unrolled to a 1 c dimensional array. They are needed here to split nonbond c list into vdw,hbonds (see pack_nb_list()) c IBLO and INB are used to mask out some nonbond interactions c (bonded pairs,1-3,1-4 pairs) etc. IBLO(i) stores the number c of atoms masked by atom i. INB stores their indices. The c masked pairs are stored back to back in INB. c For our purposes we need to produce our own variants. c See "load_mask". c NTYPES is used with IAC and ICO c X and IX are real and integer arrays which comprise the total "dynamic" c memory for amber; coords,forces,bond lists etc are accessed as c offsets in them. c integer ifail,listtot,listtotall,indtop,issetup integer nucgrd1_0,nucgrd2_0,nucgrd3_0 save nucgrd1_0,nucgrd2_0,nucgrd3_0,issetup integer inddel,result,i _REAL_ maxdis logical first data first /.true./ #ifdef ROWAT integer l_tmpcrd,l_tmpvel,l_tmpxr,l_tmpdp,l_tmpvel2 integer istart,inext,ilast,inwat,itmpro _REAL_ v(*),xr(*),vold(*) integer ntp #endif save trial,balance save last_numlist,listdiffmax c data issetup/0/ c c---------------------------- code starts here --------------------------- #ifndef MPI mytaskid=0 numtasks=1 #endif c --------------------------------------------------------------------- c ------ setup: c -------- If issetup is not set to 1 then this is the c ------- first time through, do the setup stuff c ------ do not set the issetup flag till after the c ----- if(issetup .eq. 1) block c if ( issetup .eq. 0 ) then call ew_startup(numatoms,iblo,inb,X,IX, $ iac,ntypes,ix(ixtran),r_stack) nucgrd1_0=-1 nucgrd2_0=-1 nucgrd3_0=-1 call save_crds(crd,X(lsavcrd),numatoms) balance=.false. newbalance=0 if(periodic.eq.0) balance= (numtasks.gt.1) if(balance)newbalance=2 last_numlist=0 endif trial=newbalance.gt.0 c c--------------------------------------------------------------------- c ------- SKIN (BUFFER) CHECK see if new list is required c ------- Do this block except on first pass. ----------- c ------ For nocutoff, the list will build once c ----- by skipping over this block to the build list part if ( issetup .eq. 1 )then c c ------- if the nocutoff flag is set, then there is c ------ no need to update the list, all pairs are c ----- already determined c if(nocutoff)return c c ----- the skin check is done every step. c ---- in old amber method (pre skin), update only when ntnb .ne. 0 c --- this logic is retained if nbflag = 0 c -- do not do any of this the first step, i.e. until issetup = 1 c c if nbflag=0, use old logic; i.e. only update if ntnb .ne. 0 c if nbflag=1, the skin check determines it c if ( nbflag .eq. 0 )then if ( ntnb .eq. 0 ) return if ( nbtell .eq. 1 )write(6,101)ntnb 101 format(1x,'OLD LIST LOGIC; ntnb = ',i2) #ifndef ROWAT else lastlist = lastlist + 1 call timer_start(TIME_skin) call check_skin(crd,X(lsavcrd),numatoms, $ recip,ucell,skinnb,result,maxdis) call timer_stop(TIME_skin) c c if nbtell=1, report information: c if ( master. and. result .eq. 1 .and. nbtell .eq. 1 )then write(6,100)maxdis,lastlist 100 format(1x,'CHECK_SKIN FAILED: maxdis = ',F8.4, $ i5,' steps since last update') endif if ( result .eq. 0 )return endif lastlist=0 call timer_start(TIME_skin) call save_crds(crd,X(lsavcrd),numatoms) call timer_stop(TIME_skin) #else endif #endif endif issetup = 1 call timer_start(TIME_skin) # ifdef ROWAT c--------------------------------------------------------------------- c ROWAT SETUP c if(ireorderwat.eq.1)then if(verbose.ne.0 .and. master) $ write(6,*)" ----reordering water----" call timer_start(TIME_rowat) if (plevel .ne. 1 .or. NTP .eq. 0)call xdist(v) call xdist(vold) call map_coords(crd,numatoms,X(lfrction),recip) call save_frac_crds(numatoms,X(lsavfrac),X(lfrction)) call setup_grids(nghb,cutlist,reclng,dirlng, $ nucgrd1,nucgrd2,nucgrd3,nghb1,nghb2,nghb3, $ periodic,nogrdptrs,verbose) if ( nucgrd1*nucgrd2*nucgrd3 .gt. nucgmax ) $ call ewald_bomb('ewald_list',' volume of ucell too big!!', $ 'a regular restart should fix things') call fill_tranvec() indtop = nucgrd1*nucgrd2*nucgrd3 inddel = (indtop-1) / numtasks + 1 if ( inddel .eq. 0 )inddel = 1 gindexlo = 1+(mytaskid)*inddel gindexhi = gindexlo+inddel-1 if ( mytaskid .eq. numtasks-1 ) gindexhi = indtop if(trial .or. .not.balance)then inddel = (indtop-1) / numtasks + 1 if ( inddel .eq. 0 )inddel = 1 gindexlo = 1+(mytaskid)*inddel gindexhi = gindexlo+inddel-1 if ( mytaskid .eq. numtasks-1 ) gindexhi = indtop endif c call grid_ucell(numatoms,nucgrd1,nucgrd2,nucgrd3, $ IX(iatmcell),IX(inumatg),IX(iindoff), $ IX(iindatg),X(lfrction),periodic) call get_stack(l_tmpcrd,numatoms*3) call get_stack(l_tmpvel,numatoms*3) call get_stack(l_tmpvel2,numatoms*3) call get_stack(l_tmpxr ,numatoms*3) l_tmpdp=1 if(mpoltype.eq.1)call get_stack(l_tmpdp ,numatoms*6) call get_istack(istart,nucgrd1*nucgrd2*nucgrd3) call get_istack(inext,numatoms) call get_istack(ilast,nucgrd1*nucgrd2*nucgrd3) call get_istack(inwat,nucgrd1*nucgrd2*nucgrd3) call get_istack(itmpro,numatoms) call reorder_wat(IX(irowat),robgwat,roenwat, $ crd,v,vold,xr,x(linddip),x(ldipvel), $ numatoms,nucgrd1,nucgrd2,nucgrd3, $ IX(iatmcell),IX(inumatg),IX(iindoff),IX(iindatg), $ i_stack(istart),i_stack(inext),i_stack(ilast), $ i_stack(inwat),i_stack(itmpro), $ r_stack(l_tmpcrd),r_stack(l_tmpvel),r_stack(l_tmpvel2), $ r_stack(l_tmpxr),r_stack(l_tmpdp), $ r_stack(l_tmpdp+numatoms*3),mpoltype,verbose,master) if(mpoltype.eq.1)call free_stack(l_tmpdp) call free_stack(l_tmpxr) call free_stack(l_tmpvel2) call free_stack(l_tmpvel) call free_stack(l_tmpcrd) call free_istack(itmpro) call free_istack(inwat) call free_istack(ilast) call free_istack(inext) call free_istack(istart) call timer_stop(TIME_rowat) endif c c DONE ROWAT SETUP c--------------------------------------------------------------------- # endif call save_crds(crd,X(lsavcrd),numatoms) call timer_stop(TIME_skin) #ifdef MPI c------------------------------------------------------------------------ c START THE LIST BUILD, FIRST THE LIST GRID SETUP c--------------------------------------------------------------------- c if(i_do_direct)then call MPI_COMM_SIZE(direct_comm,numtasks,ierr) call MPI_COMM_RANK(direct_comm,mytaskid,ierr) #endif call timer_start(TIME_map) call map_coords(crd,numatoms,X(lfrction),recip) call save_frac_crds(numatoms,X(lsavfrac),X(lfrction)) call timer_stop_start(TIME_map,TIME_setgrd) call setup_grids(nghb,cutlist,reclng,dirlng, $ nucgrd1,nucgrd2,nucgrd3,nghb1,nghb2,nghb3, $ periodic,nogrdptrs,verbose) if ( nucgrd1*nucgrd2*nucgrd3 .gt. nucgmax ) $ call ewald_bomb('ewald_list', $ ' volume of ucell too big, too many subcells', $ ' list grid memory needs to be reallocated, restart sander') c c --------- Nonperiodic systems: -------------------------------------- if(periodic.eq.0)then c ----- If system has too few cells for the pointer method c ----- to be efficient, use the no-grid-pointer system c ----- where all forward cells are checked for pair atoms c ----- rather than just the forward cells on the grid-pointer-list c nogrdptrs=nogrdptrs .or.( (nucgrd1.le.2*nghb+2) .or. $ (nucgrd1.le.2*nghb+2) .or. $ (nucgrd1.le.2*nghb+2) ) if(verbose.ge.3)write(6,*)" List using nogrdptrs ",nogrdptrs c ----- Make the subcells about 3 A in size now so that there c ----- are more subcells, easier to load balance. if(nogrdptrs)then if(dirlng(1)/nucgrd1 .gt.3)then ngrd1 = dirlng(1)/3 ngrd2 = dirlng(2)/3 ngrd3 = dirlng(3)/3 if ( verbose .ge. 1)then write(6,'("| New Grids set up for nogrdptrs ")') write(6, '(5X,a,/,5X,i9,1X,i9,1X,i9)') . 'Number of grids per unit cell in x,y,z:', . ngrd1, ngrd2, ngrd3 endif nucgrd1=ngrd1 nucgrd2=ngrd2 nucgrd3=ngrd3 endif endif endif c call fill_tranvec() call timer_stop_start(TIME_setgrd,TIME_grduc) indtop = nucgrd1*nucgrd2*nucgrd3 #ifdef MPI c c--------------------------------------------------------------------- c For trial run of a parallel run, an initial guess at c the distribution of subcells must be made. (trial=.true.) c For a periodic system, the balance is assumed to be c good for evenly dividing the subcells. (balance = .false.) c if(trial .or. .not.balance)then inddel = (indtop-1) / numtasks + 1 if ( inddel .eq. 0 )inddel = 1 gindexlo = 1+(mytaskid)*inddel gindexhi = gindexlo+inddel-1 if ( mytaskid .eq. numtasks-1 ) gindexhi = indtop last_numlist = 0 endif #else c--------------------------------------------------------------------- c ----- for non parallel runs, do all the subcells gindexlo=1 gindexhi=indtop #endif c--------------------------------------------------------------------- c ----- assign atoms to cells (atmcell) and make cell atom list (indatg) call grid_ucell(numatoms,nucgrd1,nucgrd2,nucgrd3, $ IX(iatmcell),IX(inumatg),IX(iindoff),IX(iindatg),X(lfrction) $ ,periodic) c c --------------------------------------------------------------------- if(periodic.eq.0)then c ----- Nonperiodic systems: isiz=max(2,(gindexhi-gindexlo+1)*(maxnptrs+1)) if(nogrdptrs)isiz=1 call get_istack(inghbptr,isiz) isiz=max(2,(gindexhi-gindexlo+1)*(maxnptrs)) if(nogrdptrs)isiz=1 call get_istack(inghtran,isiz) call grid_pointers( $ nucgrd1,nucgrd2,nucgrd3,nghb1,nghb2,nghb3, $ I_stack(inghbptr),maxnptrs,numnptrs, $ I_stack(inghtran),IX(imy_grids), $ indtop,gindexlo,gindexhi,ix(ixtran), $ periodic,nogrdptrs) else c ----- Periodic systems: c only call grid_pointers if the unit cell has changed so much that c nucgrd[123] have changed. Otherwise the grid pointers c do not change. if(nucgrd1.ne.nucgrd1_0 $ .or. nucgrd2.ne.nucgrd2_0 $ .or. nucgrd3.ne.nucgrd3_0)then nucgrd1_0=nucgrd1 nucgrd2_0=nucgrd2 nucgrd3_0=nucgrd3 call grid_pointers( $ nucgrd1,nucgrd2,nucgrd3,nghb1,nghb2,nghb3, $ IX(inghbptr),maxnptrs,numnptrs, $ IX(inghtran),IX(imy_grids), $ indtop,gindexlo,gindexhi,ix(ixtran), $ periodic,nogrdptrs) endif endif c c --------------------------------------------------------------------- c call timer_stop_start(TIME_grduc,TIME_grdim) call grid_image(nucgrd1,nucgrd2,nucgrd3, $ maximage,numimage,IX(inumatg),IX(iindoff), $ IX(iindatg),IX(inumimg), $ IX(inlogrid),IX(inhigrid),X(lfrction), $ X(limgcrds),ucell,IX(ibckptr),IX(iimgind),verbose,periodic $ ,IX(imy_grids) $ ) call timer_stop_start(TIME_grdim,TIME_bldlst) if(balance)then sizgrdprs=indtop else sizgrdprs=2 endif call get_istack(igridpairs,sizgrdprs) if(periodic.eq.0)then call get_nb_list(iac,ico,ntypes,ifail, $ listtot,indtop,maxnblst,numatoms, $ IX(iatmlist),IX(ilist),IX(iscratch), $ IX(iiwa),IX(iiwh), $ ipairs, $ IX(Inumimg),IX(inlogrid),IX(inhigrid), $ numnptrs,maxnptrs,I_stack(inghbptr),nghb1,IX(ibckptr), $ IX(inumvdw),IX(inumhbnd),cutlist, $ X(limgcrds),IX(imask),IX(inummask),IX(imaskptr),verbose, $ IX(iitran),IX(iktran),I_stack(inghtran),tranvec, $ ix(ixtran),IX(imygrdlist),numimage,nucgrd1, $ nucgrd2*nucgrd3, $ gindexlo,gindexhi, $ belly,ibelly,balance,i_stack(igridpairs), $ periodic,nogrdptrs) else call get_nb_list(iac,ico,ntypes,ifail, $ listtot,indtop,maxnblst,numatoms, $ IX(iatmlist),IX(ilist),IX(iscratch), $ IX(iiwa),IX(iiwh), $ ipairs, $ IX(Inumimg),IX(inlogrid),IX(inhigrid), $ numnptrs,maxnptrs,IX(inghbptr),nghb1,IX(ibckptr), $ IX(inumvdw),IX(inumhbnd),cutlist, $ X(limgcrds),IX(imask),IX(inummask),IX(imaskptr),verbose, $ IX(iitran),IX(iktran),IX(inghtran),tranvec, $ ix(ixtran),IX(imygrdlist),numimage,nucgrd1, $ nucgrd2*nucgrd3, $ gindexlo,gindexhi, $ belly,ibelly,balance,i_stack(igridpairs), $ periodic,nogrdptrs) endif call timer_stop(TIME_bldlst) if ( ifail .eq. 1) then write(6, '(5x,a,i10)') 'SIZE OF NONBOND LIST = ', listtot call ewald_bomb('ewald_list','Non bond list overflow!', #ifdef MEM_ALLOC $ 'check MAXPR in locmem.f') #else $ 'check MAXPR in sizes.h') #endif endif if(trial)then if(periodic.ne.0)then call ewald_bomb('get_nb_list', $ ' Cannot do load balance for periodic', $ ' Logic flaw in code. ') endif #ifdef MPI call timer_start(TIME_grdim) call MPI_ALLREDUCE(I_stack(igridpairs), $ I_stack(igridpairs+indtop),indtop, $ MPI_INTEGER, MPI_SUM,MPI_COMM_WORLD,ierr) call fix_grid_balance(gindexlo,gindexhi, $ i_stack(igridpairs+indtop), $ indtop,numtasks,mytaskid, $ listdiffmax) c call free_istack(igridpairs) call free_istack(inghtran) call free_istack(inghbptr) c isiz=max(2,(gindexhi-gindexlo+1)*(maxnptrs+1)) if(nogrdptrs)isiz=1 call get_istack(inghbptr,isiz) isiz=max(2,(gindexhi-gindexlo+1)*(maxnptrs)) if(nogrdptrs)isiz=1 call get_istack(inghtran,isiz) call get_istack(igridpairs,2*nucgrd1*nucgrd2*nucgrd3) c call grid_pointers( $ nucgrd1,nucgrd2,nucgrd3,nghb1,nghb2,nghb3, $ I_stack(inghbptr),maxnptrs,numnptrs, $ I_stack(inghtran),IX(imy_grids), $ indtop,gindexlo,gindexhi,ix(ixtran), $ periodic,nogrdptrs) call grid_image(nucgrd1,nucgrd2,nucgrd3, $ maximage,numimage,IX(inumatg),IX(iindoff), $ IX(iindatg),IX(inumimg), $ IX(inlogrid),IX(inhigrid),X(lfrction), $ X(limgcrds),ucell,IX(ibckptr),IX(iimgind), $ verbose,periodic $ ,IX(imy_grids) $ ) call timer_stop_start(TIME_grdim,TIME_bldlst) call get_nb_list(iac,ico,ntypes,ifail, $ listtot,indtop,maxnblst,numatoms, $ IX(iatmlist),IX(ilist),IX(iscratch), $ IX(iiwa),IX(iiwh), $ ipairs, $ IX(Inumimg),IX(inlogrid),IX(inhigrid), $ numnptrs,maxnptrs,I_stack(inghbptr),nghb1,IX(ibckptr), $ IX(inumvdw),IX(inumhbnd),cutlist, $ X(limgcrds),IX(imask),IX(inummask),IX(imaskptr),verbose, $ IX(iitran),IX(iktran),I_stack(inghtran),tranvec, $ ix(ixtran),IX(imygrdlist),numimage,nucgrd1, $ nucgrd2*nucgrd3, $ gindexlo,gindexhi, $ belly,ibelly,balance,I_stack(igridpairs), $ periodic,nogrdptrs) call timer_stop(TIME_bldlst) #endif trial=.false. last_numlist=listtot newbalance=0 elseif(balance)then c --- If this was not a rebalance step, then check c whether the list has gotten more than a tolerance c away from the last balance. If so, trigger c a new balance by setting trial and newbalance c newbalance will need to be communicated to c all processors before the next list build. listdiff=iabs(last_numlist-listtot) if(listdiff.gt.listdiffmax)then newbalance=2 endif endif #ifdef MPI if(balance .and. (verbose.gt.0) )then if(master) write(6,105) if(master) write(6,106) do i=0,numtasks-1 tmplist(i)=0 enddo tmplist(mytaskid)=listtot call MPI_ALLREDUCE(tmplist(0),alllist(0),numtasks, $ MPI_INTEGER, MPI_SUM,MPI_COMM_WORLD,ierr) c if(verbose.gt.1)then if(mytaskid.eq.0)then write(6,110)(i,alllist(i),i=0,numtasks-1) do i=1,numtasks-1 tmplist(0)=tmplist(0)+alllist(i) enddo write(6,103)tmplist(0) c endif endif 110 format("| ",2i12) 102 format("| ",3i12) 103 format("| Total: ",i12) 105 format("| ",'----------------List Breakdown----------------') 106 format("| ",'list processor listtot') endif #else if(master)then if ( verbose .gt. 0)write(6,*)'listtot = ',listtot endif #endif c call free_istack(igridpairs) if( periodic.eq.0 )then call free_istack(inghtran) call free_istack(inghbptr) endif listtotall=listtot #ifdef MPI if(first)then call MPI_ALLREDUCE(listtot,listtotall,1, $ MPI_INTEGER, MPI_SUM,MPI_COMM_WORLD,ierr) endif call MPI_COMM_RANK(MPI_COMM_WORLD,mytaskid,ierr) call MPI_COMM_SIZE(MPI_COMM_WORLD,numtasks,ierr) endif call MPI_BARRIER(MPI_COMM_WORLD,ierr) #endif if( first .and. master ) then write(6, '(a,i10)') $ '| Local SIZE OF NONBOND LIST = ', listtot write(6, '(a,i10)') $ '| TOTAL SIZE OF NONBOND LIST = ', listtotall end if first = .false. return end c c========================================================================= c----------------------------------------------------- c GET_EE_FUNC c----------------------------------------------------- c subroutine get_ee_func(x,switch,d_switch_dx,ee_type) implicit none _REAL_ x,switch,d_switch_dx integer ee_type _REAL_ pi c c ---get switch function multiplying the Coulomb interaction 1/r c r has been converted to x by x = dxdr*r for convenience c if ( ee_type .eq. 1 )then c c ---erfc function for ewald c #ifdef DPREC call derfcfun(x,switch) #else call erfcfun(x,switch) #endif pi = 3.14159265358979323846d0 d_switch_dx = -2.d0*exp(-x*x)/sqrt(pi) c elseif ( ee_type .eq. 2 )then c c ---force shift cutoff c if ( x .lt. 1.d0 )then switch = (1.d0 - x)**2 d_switch_dx = -2.d0*(1.d0 - x) else switch = 0.d0 d_switch_dx = 0.d0 endif endif return end c---------------------------------------------------------------------------- c c --- GET_NB_ENERGY -- c c---------------------------------------------------------------------------- c ...the main routine for non bond energy (vdw and hbond) c as well as direct part of ewald sum. c It is structured for parallelism... c c---------------------------------------------------------------------------- subroutine get_nb_energy(iac,ico,ntypes,charge, $ cn1,cn2,asol,bsol,force,numatoms, $ indtop,numimg,nlogrid,nhigrid,bckptr, $ numvdw,numhbnd,ipairs,imagcrds, $ ewaldcof,eedtbdns,eed_cub,eed_lin, $ maxnblst,eelt,evdw,ehb,virial,eedvir, $ filter_cut,ee_type,eedmeth,dxdr, $ epol,dipole,field,mpoltype,r_stack,i_stack, $ gindexlo,gindexhi) implicit none #ifdef MPI #include "ew_parallel.h" #include "parallel.h" c$$$ integer tasknum,inddel #endif #include "flocntrl.h" integer l_real_df,l_real_x,l_real_y,l_real_z,l_real_r2,l_int integer numatoms,maxnblst,mpoltype integer iac(*),ico(*),ntypes,indtop,ee_type,eedmeth integer numimg(*),nlogrid(*),nhigrid(*) integer bckptr(*),numvdw(*),numhbnd(*) _REAL_ charge(*),cn1(*),cn2(*),asol(*),bsol(*) _REAL_ imagcrds(3,*),ewaldcof,eedtbdns,dxdr, $ eed_cub(4,*),eed_lin(2,*),virial(3,3) integer ipairs(maxnblst) _REAL_ force(3,numatoms),eelt,epol,evdw,ehb _REAL_ eedvir,filter_cut, $ dipole(3,*),field(3,*) integer gindexlo,gindexhi integer index,numpack,i,k,l,ncell_lo,ncell_hi,ntot,nvdw _REAL_ xk,yk,zk integer i_stack(*) _REAL_ r_stack(*) integer nn c c ---FLOW CONTROL FLAG (debug and future respa) c if ( do_dir .eq. 0 )return eelt = 0.d0 epol = 0.d0 evdw = 0.d0 ehb = 0.d0 eedvir = 0.d0 do l = 1,3 do k = 1,3 virial(k,l) = 0.d0 enddo end do numpack = 1 do index = gindexlo,gindexhi k = index if ( numimg(k) .gt. 0 )then ncell_lo = nlogrid(k) ncell_hi = nhigrid(k) do k = ncell_lo,ncell_hi i = bckptr(k) xk = imagcrds(1,k) yk = imagcrds(2,k) zk = imagcrds(3,k) ntot = numVDW(i) + numHBND(i) nvdw = numVDW(i) if ( ntot .gt. 0 )then call get_stack(l_real_df,ntot) call get_stack(l_real_x,ntot) call get_stack(l_real_y,ntot) call get_stack(l_real_z,ntot) call get_stack(l_real_r2,ntot) call get_istack(l_int,ntot) if ( mpoltype .eq. 0 )then call short_ene(i,xk,yk,zk,ipairs(numpack),ntot,nvdw, $ bckptr,imagcrds,ewaldcof,eedtbdns, $ eed_cub,eed_lin,charge, $ ntypes,iac,ico,cn1,cn2,asol,bsol,filter_cut, $ eelt,evdw,ehb,force,virial,ee_type,eedmeth,dxdr, $ eedvir,r_stack(l_real_df),r_stack(l_real_x) $ ,r_stack(l_real_y),r_stack(l_real_z) $ ,r_stack(l_real_r2),i_stack(l_int) $ ) elseif ( mpoltype .eq. 1 )then call short_ene_dip(i,xk,yk,zk,ipairs(numpack),ntot,nvdw, $ bckptr,imagcrds,ewaldcof,eedtbdns, $ eed_cub,eed_lin,charge,dipole, $ ntypes,iac,ico,cn1,cn2,asol,bsol,filter_cut, $ eelt,epol,evdw,ehb,force,field,virial, $ ee_type,eedmeth,dxdr,eedvir) endif call free_stack(l_real_r2) call free_stack(l_real_z) call free_stack(l_real_y) call free_stack(l_real_x) call free_stack(l_real_df) call free_istack(l_int) numpack = numpack + ntot endif end do endif end do return end c---------------------------------------------------------------------------- c c --- GET_NB_LIST --- ********* c c ...this routine carries out the short range part of nonbond c calculation. Note that I am using a gridding routine, or c geometric hashing with renumbering of atoms to speed list c generation and to promote locality of reference, important c for cache memory usage. The pre-imaging is to avoid expensive c coordinate transforms in the minimum image pair calculations. c I set up pointers between the subcells to speed this process. c NOTE: this comment also refers to routine pack_nb_list(). c c---------------------------------------------------------------------------- subroutine get_nb_list(iac,ico,ntypes,ifail, $ listtot,indtop,maxnblst,numatoms, $ atmlist,list,scratch,iwa,iwh,ipairs, $ numimg,nlogrid,nhigrid,numnptrs,maxnptrs, $ nghbptr,nghb1, bckptr,numvdw,numhbnd,cutoff_list, $ imagcrds,mask,nummask,maskptr,verbose, $ itran,ktran,nghtran, $ tranvec,xtran,mygrdlist,numimage,nucgrd1,nucgrd23, $ gindexlo,gindexhi, $ belly,ibelly,balance,gridpairs,periodic,nogrdptrs) c ---main routine for list building c note it is structured for parallelism c implicit none #include "extra.h" #ifdef MPI # include "ew_parallel.h" # include "parallel.h" # include "mpif.h" #else /* not parallel needs numtasks and mytaskid */ integer numtasks,mytaskid #endif c _REAL_ cutoff_list,imagcrds(3,*) _REAL_ tranvec(*) integer iac(*),ico(*),ntypes,ifail,listtot integer indtop,maxnblst,numatoms,maxnptrs integer atmlist(numatoms), $ list(numatoms),scratch(numatoms), $ iwa(numatoms),iwh(numatoms) $ ,mygrdlist(*),numimage,nucgrd1,nucgrd23 integer xtran(7,10) integer ipairs(maxnblst) integer gindexlo,gindexhi integer ibelly(*) logical belly,balance integer np0,gridpairs(indtop,2) integer numimg(*),nlogrid(*),nhigrid(*) integer bckptr(*),numvdw(*),numhbnd(*),numnptrs,nghb1, $ nghbptr(0:maxnptrs,*),mask(*),maskptr(*),nummask(*), $ verbose C integer itran(*),ktran(*),nghtran(maxnptrs,*) integer numlist,num integer index,index0,index1 integer j,jj,j0,k,ncell_lo,ncell_hi,m,m1,m2 integer i,kk,numpack integer nstart,tranyz,xtindex,jtran,indi,nstop,numindex _REAL_ xk,yk,zk _REAL_ cutoffsq integer periodic logical nogrdptrs #ifndef MPI numtasks=1 mytaskid=0 #endif do i=1,numimage mygrdlist(i)=0 enddo ifail = 0 cutoffsq = cutoff_list*cutoff_list do j = 1,numatoms scratch(j) = 0 end do numpack = 0 # ifdef MPI if(balance)then np0=0 do i=1,indtop gridpairs(i,1)=0 enddo endif # endif do index = gindexlo,gindexhi index0=index-gindexlo+1 if ( numimg(index) .gt. 0 )then ncell_lo = nlogrid(index) ncell_hi = nhigrid(index) numlist = 0 c ========================================================== if(nogrdptrs)then c ========================================================== if(periodic.ne.0) $ call ewald_bomb('get_nb_list', $ 'Cannot run nogrdptrs with ntb=1', $ 'turn off nogrdptrs ') do j0 = index,indtop m1 = nlogrid(j0) m2 = nhigrid(j0) if ( m2 .ge. m1 )then do m = m1,m2 numlist = numlist+1 atmlist(numlist) = m enddo endif end do if ( numlist .gt. 0 )then do k = ncell_lo,ncell_hi kk = k - ncell_lo + 1 i = bckptr(k) xk = imagcrds(1,k) yk = imagcrds(2,k) zk = imagcrds(3,k) mygrdlist(k)=1 call pack_nb_nogrdptrs(kk,i,xk,yk,zk,imagcrds,cutoffsq, $ atmlist,numlist,list,num,numpack,bckptr, $ iac,ico,ntypes,iwa,iwh,ipairs, $ numVDW,numHBND,maxnblst,ifail, $ mask,nummask,maskptr,scratch, $ mygrdlist, $ belly,ibelly) if ( ifail .eq. 1)then listtot = maxnblst return endif end do endif c ========================================================== else ! nogrdptrs if statement c ========================================================== c c -- get list of atoms in the neighborhood of cell(index0) c numindex= numnptrs if(periodic.eq.0) numindex = nghbptr(0,index0) do j0 = 1,numindex index1 = nghbptr(j0,index0) if ( j0 .eq. 1 )then nstart=0 else nstart=-nghb1 endif nstop=nghb1 if(periodic.eq.0)then indi=mod(index1-1,nucgrd1)+1 if(indi.gt.nucgrd1-nghb1)nstop=nucgrd1-indi if(indi.le.nghb1)nstart=1-indi if ( j0 .eq. 1 ) nstart=0 endif xtindex=ishft(nghtran(j0,index0),-8) if(xtindex.gt.10 .or. xtindex.lt.0)then print *,"xtindex BAD: ",xtindex print *," *********** ",mytaskid,index0,index1,j0 print *," *********** ",numindex,gindexlo,gindexhi close(50) call mexit(6,1) endif tranyz = nghtran(j0,index0)-ishft(xtindex,8) do j = nstart,nstop jtran = tranyz+xtran(j-nstart+1,xtindex) jj = index1+j-xtran(j-nstart+1,xtindex)*nucgrd1 m1 = nlogrid(jj) m2 = nhigrid(jj) if ( m2 .ge. m1 )then do m = m1,m2 numlist = numlist+1 atmlist(numlist) = m itran(numlist)=jtran enddo endif end do end do if ( numlist .gt. 0 )then do k = ncell_lo,ncell_hi kk = k - ncell_lo + 1 i = bckptr(k) xk = imagcrds(1,k) yk = imagcrds(2,k) zk = imagcrds(3,k) mygrdlist(k)=1 call pack_nb_list(kk,i,xk,yk,zk,imagcrds,cutoffsq, $ atmlist,numlist,list,num,numpack,bckptr, $ iac,ico,ntypes,iwa,iwh,ipairs, $ numVDW,numHBND,maxnblst,ifail, $ mask,nummask,maskptr,scratch, $ itran,ktran,tranvec,mygrdlist, $ belly,ibelly) if ( ifail .eq. 1)then listtot = maxnblst return endif end do endif c ========================================================== endif ! nogrdptrs if statement c ========================================================== endif # ifdef MPI if(balance)then gridpairs(index,1)=numpack-np0 np0=numpack endif # endif end do listtot = numpack return end c c------------------------------------------------------------------- c --- GRID_IMAGE --- c c ...this routine grids the imaged atoms in the unit cell plus c neighbor cells while building the array of sorted image atoms. c The sorting should improve locality of reference... c subroutine grid_image(nucgrd1,nucgrd2,nucgrd3, $ maximage,numimage,numatg,indoff,indatg, $ numimg,nlogrid,nhigrid,fraction,imagcrds,ucell,bckptr, $ imagind,verbose,periodic $ ,my_grids $ ) implicit none #include "extra.h" #ifdef MPI # include "ew_parallel.h" # include "parallel.h" #endif integer nucgrd1,nucgrd2,nucgrd3 integer maximage,numimage integer numatg(*),indatg(*),indoff(*) integer nlogrid(*),nhigrid(*),imagind(*) integer numimg(*),bckptr(maximage),verbose,periodic _REAL_ imagcrds(3,maximage) _REAL_ fraction(3,*),ucell(3,3) _REAL_ f1,f2,f3,shft integer index,indtop,i,j,n $ ,my_grids(*) integer i0,i1,i2 shft=0.5d0 if(periodic.eq.0)shft=0.d0 indtop = nucgrd1*nucgrd2*nucgrd3 numimage = 0 do index = 1,indtop if(my_grids(index).eq.1)then n = numatg(index) if (numimage + n .gt. maximage ) then c Maximage is only estimated via a density plus fudge factor c see local_nb_get_sizes.f. So check for overflow. c (happens to us with bad c crystal set ups i.e. inappropriate periodic bdries, c or with big cell fluctuations write(6,*)'maximage = ',maximage write(6,*)'numimage = ',numimage call ewald_bomb('grid_image', $ 'num image atoms exceeds maximage!!', $ 'check ewaldlocmem ') endif i0 = indoff(index) i1 = i0 + 1 i2 = i0 + n nlogrid(index) = numimage + 1 nhigrid(index) = numimage + n numimg(index) = n do i = i1,i2 j = indatg(i) f1 = fraction(1,j)+shft f2 = fraction(2,j)+shft f3 = fraction(3,j)+shft numimage = numimage + 1 imagcrds(1,numimage) = f1*ucell(1,1) + f2*ucell(1,2) + $ f3*ucell(1,3) imagcrds(2,numimage) = f1*ucell(2,1) + f2*ucell(2,2) + $ f3*ucell(2,3) imagcrds(3,numimage) = f1*ucell(3,1) + f2*ucell(3,2) + $ f3*ucell(3,3) bckptr(numimage) = j imagind(numimage) = index end do endif end do #ifdef MPI if(master) then #endif if (verbose .eq. 1)write(6,*)'grid_image: numimage = ',numimage #ifdef MPI endif #endif return end c------------------------------------------------------------------- c --- GRID_POINTERS --- c subroutine grid_pointers( $ nucgrd1,nucgrd2,nucgrd3,nghb1,nghb2,nghb3, $ nghbptr,maxnptrs,numnptrs,nghtran,my_grids, $ indtop,gindexlo,gindexhi,xtran, $ periodic,nogrdptrs) implicit none #include "parallel.h" integer nucgrd1,nucgrd2,nucgrd3,nghb1,nghb2,nghb3 integer xtran(7,10) integer maxnptrs,nghbptr(0:maxnptrs,*),numnptrs $ ,nghtran(maxnptrs,*) integer my_grids(*) integer indtop,gindexlo,gindexhi,index0 integer periodic integer i1,i2,i3,j2,j3,i2grd,i3grd,j3grd,j2grd,jj2 integer j2strt,j2stop,jj2strt,jj2stop,j3stop integer index,num integer k3off integer xtindex_1,xtindex_2 integer sizgrd12 integer index0_min,index0_max logical nogrdptrs index0_min = maxnptrs index0_max = 0 sizgrd12=nucgrd1*nucgrd2 do i1=1,indtop my_grids(i1)=0 enddo c ---get forward pointers for unit cell grid c neighbor cells must be later in the subcell list c to get unique atom - imageatom pairing c------------------------------------------------------------------------- c Non-Periodic cell list pointers c Will get ri if(periodic.eq.0)then if(nogrdptrs)then do i1=gindexlo,nucgrd1*nucgrd2*nucgrd3 my_grids(i1)=1 enddo else do i3 = 1,nucgrd3 i3grd=sizgrd12*(i3-1) do i2 = 1,nucgrd2 i2grd=i3grd+nucgrd1*(i2-1) do i1 = 1,nucgrd1 index = i2grd+i1 if(index.lt.gindexlo.or.index.gt.gindexhi)goto 180 my_grids(index)=1 index0=index-gindexlo+1 index0_max = max (index0_max,index0) index0_min = min (index0_min,index0) c c Set up xtran index for this cell c c UNSTABLE till fixed: c Need to make sure that atoms do not drift to edges c Otherwise this simplified xtindex will not work. c xtindex_1 = 1 xtindex_2 = 1 j2strt=-nghb2 if(i2+j2strt.lt.1)j2strt=1-i2 j2stop=nghb2 if(i2+j2stop.gt.nucgrd2)j2stop=nucgrd2-i2 jj2strt=-nghb1 if(i1+jj2strt.lt.1)jj2strt=1-i1 jj2stop=nghb1 if(i1+jj2stop.gt.nucgrd1)jj2stop=nucgrd1-i1 c c first do the plane that this cell is in. c first row of neighbors starts with the cell itself (Cell0) c num = 1 nghbptr(num,index0) = index c c no y or z translate, this cell HAS to be in uc c nghtran(num,index0) = 5+ishft(1,8) do jj2=1,jj2stop my_grids(index+jj2)=1 enddo c c next rows of neighbors are the nghb2 rows of 2*nghb1+1 cells c above. Use the center cell of the row as the pointer c (with the same x position as Cell0) c This way the pointer cell must be in the uc wrt the x direction. c do j2 = 1,j2stop j2grd=index+j2*nucgrd1 if(j2+i2.le.nucgrd2)then num=num+1 nghtran(num,index0)=5+ishft(xtindex_2,8) nghbptr(num,index0) = j2grd do jj2=jj2strt,jj2stop my_grids(j2grd+jj2 $ -nucgrd1*xtran(jj2+1+nghb1,xtindex_2))=1 enddo endif enddo c c Now there are nghb3 planes of (2*nghb1+1)(2*nghb2+1) cells c ahead that are good neighbors. c j3stop=nghb3 if(i3+j3stop.gt.nucgrd3)j3stop=nucgrd3-i3 do j3 = 1,j3stop j3grd=j3*sizgrd12 do j2 = j2strt,j2stop num=num+1 j2grd=index+j3grd+j2*nucgrd1 nghtran(num,index0)= $ 5+ishft(xtindex_2,8) nghbptr(num,index0) = j2grd do jj2=jj2strt,jj2stop my_grids(j2grd+jj2)=1 enddo enddo enddo nghbptr(0,index0) = num 180 continue enddo enddo enddo endif numnptrs = maxnptrs return endif c------------------------------------------------------------------------- do 300 i3 = 1,nucgrd3 i3grd=sizgrd12*(i3-1) do 290 i2 = 1,nucgrd2 i2grd=i3grd+nucgrd1*(i2-1) do 280 i1 = 1,nucgrd1 index = i2grd+i1 if(index.lt.gindexlo.or.index.gt.gindexhi)goto 280 my_grids(index)=1 index0=index-gindexlo+1 index0_max = max (index0_max,index0) index0_min = min (index0_min,index0) c c Set up xtran index for this cell c if(i1+nghb1 .gt.nucgrd1)then xtindex_1 = 1+i1+nghb1-nucgrd1 xtindex_2 = xtindex_1 + 2*nghb1 elseif(i1-nghb1 .lt. 1)then xtindex_1 = 1 xtindex_2 = 2*nghb1+2-i1 else xtindex_1 = 1 xtindex_2 = 1 endif c c first do the plane that this cell is in. c first row of neighbors starts with the cell itself (Cell0) c num = 1 nghbptr(num,index0) = index c c no y or z translate, this cell HAS to be in uc c nghtran(num,index0) = 5+ishft(xtindex_1,8) do jj2=1,nghb1 my_grids(index+jj2 $ -nucgrd1*xtran(jj2+1,xtindex_1))=1 enddo c c next rows of neighbors are the nghb2 rows of 2*nghb1+1 cells c above. Use the center cell of the row as the pointer c (with the same x position as Cell0) c This way the pointer cell must be in the uc wrt the x direction. c do j2 = 1,nghb2 num=num+1 j2grd=index+j2*nucgrd1 if(j2+i2.le.nucgrd2)then nghtran(num,index0)=5+ishft(xtindex_2,8) else j2grd=j2grd-sizgrd12 nghtran(num,index0)=8+ishft(xtindex_2,8) endif nghbptr(num,index0) = j2grd do jj2=-nghb1,nghb1 my_grids(j2grd+jj2 $ -nucgrd1*xtran(jj2+1+nghb1,xtindex_2))=1 enddo enddo c c Now there are nghb3 planes of (2*nghb1+1)(2*nghb2+1) cells c ahead that are good neighbors. c do j3 = 1,nghb3 j3grd=j3*sizgrd12 if(j3+i3.gt.nucgrd3)then j3grd=j3grd-sizgrd12*nucgrd3 k3off=9 else k3off=0 endif do j2 = -nghb2,nghb2 num=num+1 j2grd=index+j3grd+j2*nucgrd1 if(j2+i2.gt.nucgrd2)then nghtran(num,index0)=8+k3off+ishft(xtindex_2,8) j2grd=j2grd-sizgrd12 elseif(j2+i2.lt.1)then nghtran(num,index0)=2+k3off+ishft(xtindex_2,8) j2grd=j2grd+sizgrd12 else nghtran(num,index0)=5+k3off+ishft(xtindex_2,8) endif nghbptr(num,index0) = j2grd do jj2=-nghb1,nghb1 my_grids(j2grd+jj2 $ -nucgrd1*xtran(jj2+1+nghb1,xtindex_2))=1 enddo enddo enddo 280 continue 290 continue 300 continue numnptrs = num return end c------------------------------------------------------------------- c --- GRID_UCELL --- c c ...this routine grids the mapped atoms in the unit cell c into the NUCGRD1 x NUCGRD2 x NUCGRD3 subcells according c to their fractional coordinates. c subroutine grid_ucell(numatoms,nucgrd1,nucgrd2,nucgrd3, $ atmcell,numatg,indoff,indatg,fraction,periodic) implicit none integer numatoms,nucgrd1,nucgrd2,nucgrd3,periodic integer atmcell(*),numatg(*),indoff(*),indatg(*) _REAL_ fraction(3,*) integer i,j,i1,i2,i3,index,indtop integer ichk _REAL_ shft shft=0.5d0 if(periodic.eq.0)shft=0.0 indtop = nucgrd1*nucgrd2*nucgrd3 do index = 1,indtop numatg(index) = 0 end do c----------------------------------------------------------------- c c ---find out which ucgrd subcell each atom is in. c atmcell(i) is the ucgrd subcell that contains atom i c numatg(index) is the number of atoms in the index ucgrd c do i = 1,numatoms c ---the following bombs only seem to occur when atoms fly off to infinity c so to speak due to awful vdw conflicts i1 = (fraction(1,i) + shft) * nucgrd1 + 1 c if ( i1 .le. 0 .or. i1 .gt. nucgrd1 ) c $ write(6,*)'bombs away i1',i,i1 i2 = (fraction(2,i) + shft) * nucgrd2 + 1 c if ( i2 .le. 0 .or. i2 .gt. nucgrd2 ) c $ write(6,*)'bombs away i2',i,i2 i3 = (fraction(3,i) + shft) * nucgrd3 + 1 c if ( i3 .le. 0 .or. i3 .gt. nucgrd3 ) c $ write(6,*)'bombs away i3',i,i3 index = nucgrd1*nucgrd2*(i3-1)+nucgrd1*(i2-1)+i1 atmcell(i) = index numatg(index) = numatg(index) + 1 end do c c ---find the offset of the starting atoms for each ucgrd subcell. c zero the numatg()entries as you go indoff(1) = 0 do i = 2,indtop indoff(i) = indoff(i-1) + numatg(i-1) numatg(i-1) = 0 end do c c ichk = numatoms - indoff(indtop) - numatg(indtop) c this crash also only seems to occur when atoms can not be gridded due to c truly awful coordinates e.g. NaN (We get with really bad input data c sometimes) c c if ( ichk .ne. 0 )then c write(6,*)'BIG PROBLEM in grid_ucell!!!' c call mexit(6,1) c endif c----------------------------------------------------------------- c Fill indatg() as a list of atoms in each subcell such that c the list of atoms in subcell 1 are at the beginning, c subcell 2 list is right after that (starting at indoff(2)+1) c numatg(indtop) = 0 do i = 1,numatoms index = atmcell(i) numatg(index) = numatg(index) + 1 j = numatg(index)+indoff(index) indatg(j) = i end do return end c c c*********************************************************************** c ------------------------------------------------------------------- c --- PACK_NB_LIST --- *** c ------------------------------------------------------------------- c*********************************************************************** subroutine pack_nb_list(kk,i,xk,yk,zk,imagcrds,cutoffsq, $ atmlist,numlist,list,num,numpack,bckptr, $ iac,ico,ntypes,iwa,iwh, $ ipairs, $ numVDW,numHBND,maxnblst,ifail, $ mask,nummask,maskptr,scratch, $ itran,ktran,tranvec,mygrdlist, $ belly,ibelly) implicit none integer numpack integer kk,i,atmlist(*),numlist,list(*),num,bckptr(*), $ iac(*),ico(*),ntypes,iwa(*),iwh(*), $ numVDW(*),numHBND(*), $ maxnblst,ifail $ ,mygrdlist(*) _REAL_ xk,yk,zk,imagcrds(3,*),cutoffsq integer mask(*),nummask(*),maskptr(*),scratch(*) integer ipairs(*),ibelly(*) logical belly,deadi,deadik C #include "parallel.h" _REAL_ dx,dy,dz,r2 integer iaci,ic,index,k,lpr,lhb,m,n,npr integer itran(*),ktran(*),jtran _REAL_ x_tran,y_tran,z_tran,tranvec(3,*) num = 0 m = maskptr(i) lpr = 0 lhb = 0 iaci = ntypes*(iac(i)-1) do n = 1,nummask(i) k = mask(m+n) scratch(k) = i end do deadi=.false. deadi=(ibelly(i).eq.0).and.belly do m = kk+1,numlist jtran=itran(m) c x_tran=tranvec(1,jtran) c y_tran=tranvec(2,jtran) c z_tran=tranvec(3,jtran) x_tran=tranvec(1,itran(m)) y_tran=tranvec(2,itran(m)) z_tran=tranvec(3,itran(m)) n = atmlist(m) k = bckptr(n) deadik=(ibelly(k).eq.0).and.deadi if ( (scratch(k) .ne. i) .and. .not.deadik) then dx = imagcrds(1,n) - xk + x_tran dy = imagcrds(2,n) - yk + y_tran dz = imagcrds(3,n) - zk + z_tran r2 = dx*dx + dy*dy + dz*dz if ( r2 .lt. cutoffsq ) then #ifdef TEST_INPUT if(r2.lt. .5) $ write(6,190)i,k,r2 190 format(" Atoms close:",2i8, $ " r**2 ",f10.6) #endif if (mygrdlist(n).eq.0) then mygrdlist(n)=1 endif num = num + 1 list(num) = n index = iaci+iac(k) ic = ico(index) if(ic.ge.0) then lpr = lpr+1 iwa(lpr) = ior(n,ishft(itran(m),27)) ! bitwise optimization else lhb = lhb+1 iwh(lhb) = ior(n,ishft(itran(m),27)) ! bitwise optimization endif endif endif end do if ( num + numpack .gt. maxnblst )then write(6, '(/,a,i12,i12,a,i12,a,i4)') . ' * NB pairs ', num,numpack, . ' exceeds capacity (',maxnblst,')', mytaskid ifail=1 return endif c ---now put all pairs into iwa do k = 1,lhb iwa(lpr+k) = iwh(k) end do npr = lpr+lhb numVDW(i) = lpr numHBND(i) = lhb do k = 1,npr numpack = numpack+1 ipairs(numpack) = iwa(k) c write(21,*)numpack,ishft(iwa(k),-27),iwa(k)-ishft(jtran,27) end do return end c*********************************************************************** c ------------------------------------------------------------------- c --- PACK_NB_NOGRDPTRS --- *** c ------------------------------------------------------------------- c*********************************************************************** subroutine pack_nb_nogrdptrs(kk,i,xk,yk,zk,imagcrds,cutoffsq, $ atmlist,numlist,list,num,numpack,bckptr, $ iac,ico,ntypes,iwa,iwh, $ ipairs, $ numVDW,numHBND,maxnblst,ifail, $ mask,nummask,maskptr,scratch, $ mygrdlist, $ belly,ibelly) implicit none integer numpack integer kk,i,atmlist(*),numlist,list(*),num,bckptr(*), $ iac(*),ico(*),ntypes,iwa(*),iwh(*), $ numVDW(*),numHBND(*), $ maxnblst,ifail $ ,mygrdlist(*) _REAL_ xk,yk,zk,imagcrds(3,*),cutoffsq integer mask(*),nummask(*),maskptr(*),scratch(*) integer ipairs(*),ibelly(*) logical belly,deadi,deadik integer jtran C #include "parallel.h" _REAL_ dx,dy,dz,r2 integer iaci,ic,index,k,lpr,lhb,m,n,npr num = 0 m = maskptr(i) lpr = 0 lhb = 0 iaci = ntypes*(iac(i)-1) do n = 1,nummask(i) k = mask(m+n) scratch(k) = i end do deadi=.false. deadi=(ibelly(i).eq.0).and. belly do m = kk+1,numlist jtran=5 n = atmlist(m) k = bckptr(n) deadik=(ibelly(k).eq.0).and.deadi if ( (scratch(k) .ne. i) .and. .not.deadik) then dx = imagcrds(1,n) - xk dy = imagcrds(2,n) - yk dz = imagcrds(3,n) - zk r2 = dx*dx + dy*dy + dz*dz if ( r2 .lt. cutoffsq ) then #ifdef TEST_INPUT if(r2.lt. .5) $ write(6,190)i,k,r2 190 format(" Atoms close:",2i8, $ " r**2 ",f10.6) #endif if (mygrdlist(n).eq.0) then mygrdlist(n)=1 endif num = num + 1 list(num) = n index = iaci+iac(k) ic = ico(index) if(ic.ge.0) then lpr = lpr+1 iwa(lpr) = ior(n,ishft(5,27)) ! bitwise optimization else lhb = lhb+1 iwh(lhb) = ior(n,ishft(5,27)) ! bitwise optimization endif endif endif end do if ( num + numpack .gt. maxnblst )then write(6, '(/,a,i12,i12,a,i12,a,i4)') . ' * NB pairs ', num,numpack, . ' exceeds capacity (',maxnblst,')', mytaskid ifail=1 return endif c ---now put all pairs into iwa do k = 1,lhb iwa(lpr+k) = iwh(k) end do npr = lpr+lhb numVDW(i) = lpr numHBND(i) = lhb do k = 1,npr numpack = numpack+1 ipairs(numpack) = iwa(k) end do return end c c--------------------------------------------------------------------- c --- SAVE_CRDS --- c this is needed for the skin test for buffered pairlists c coords are compared with this to see if anybody moved too far c-------------------------------------------------------------------- c subroutine save_crds(crd,savcrd,numatoms) implicit none integer numatoms _REAL_ crd(3,numatoms),savcrd(3,numatoms) integer n do 100 n = 1,numatoms savcrd(1,n) = crd(1,n) savcrd(2,n) = crd(2,n) savcrd(3,n) = crd(3,n) 100 continue return end c c------------------------------------------------------------------- c --- SAVE_FRAC_CRDS --- c ...this is needed in case you are wrapping coords in the box; c note, td actually runs ewald without wrapping since it is easier c to analyze but it should work equally well, either way. c subroutine save_frac_crds(numatoms,savfrac,fraction) implicit none integer numatoms _REAL_ savfrac(3,*), fraction(3,*) integer i do i = 1,numatoms savfrac(1,i) = fraction(1,i) savfrac(2,i) = fraction(2,i) savfrac(3,i) = fraction(3,i) end do return end c c------------------------------------------------------------------- c --- SHORT_ENE --- c-------------------------------------------------------------------------- c short_ene subroutine modified by Nathalie Godbout, sgi 04/26/00. c subroutine short_ene(i,xk,yk,zk,ipairs,numtot,numvdw, $ bckptr,imagcrds,ewaldcof,eedtbdns, $ eed_cub,eed_lin,charge, $ ntypes,iac,ico,cn1,cn2,asol,bsol,filter_cut, $ eelt,evdw,ehb,frc,virial, $ ee_type,eedmeth,dxdr,eedvir, $ cache_df,cache_x,cache_y,cache_z,cache_r2,cache_bckptr $ ) implicit none _REAL_ xk,yk,zk,imagcrds(3,*) integer i,numvdw,numtot,bckptr(*) integer ipairs(*),ee_type,eedmeth _REAL_ ewaldcof,eed_cub(*),eed_lin(2,*), $ charge(*),virial(3,3),eedvir _REAL_ eedtbdns,filter_cut,dxdr integer ntypes,iac(*),ico(*) _REAL_ cn1(*),cn2(*),asol(*),bsol(*), $ eelt,evdw,ehb,frc(3,*) integer ic,j,m,n,ind,iaci _REAL_ del,delrinv _REAL_ switch,d_switch_dx _REAL_ B0,B1 _REAL_ ee_vir_iso _REAL_ pi,third,half,filter_cut2,xx _REAL_ comm1 _REAL_ xktran(3,18) _REAL_ e3dx,e4dx integer mask27 integer nprefetch #ifdef LES # include "les.h" #endif #include "ew_nbe.h" #include "ew_parallel.h" #include "parallel.h" #include "ew_tran.h" #include "ew_cntrl.h" integer itran c c new variables. _REAL_ cache_df(*),cache_x(*),cache_y(*) _REAL_ cache_z(*),cache_r2(*) integer im_new,icount integer cache_bckptr(*) c pi = 3.14159265358979323846d0 third=1.d0/3.d0 half = 0.5d0 vxx = 0.d0 vxy = 0.d0 vxz = 0.d0 vyy = 0.d0 vyz = 0.d0 vzz = 0.d0 ee_vir_iso = 0.d0 del = 1.d0 / eedtbdns dumx = 0.d0 dumy = 0.d0 dumz = 0.d0 filter_cut2 = filter_cut*filter_cut cgi = charge(i) iaci = ntypes * (iac(i) - 1) c mask27 = 2**27 - 1 do m=1,18 xktran(1,m) = tranvec(1,m) - xk xktran(2,m) = tranvec(2,m) - yk xktran(3,m) = tranvec(3,m) - zk enddo #ifdef LES lestmp=nlesty*(lestyp(i)-1) #endif c c The "eedmeth" decision is unrolled here, since this provides c significant efficiency improvements, in spite of making the code c more ugly.... c if ( eedmeth .eq. 1 )then c c------------------------------------------------------------------------------ c Loop over the 12-6 LJ terms for eedmeth = 1 c------------------------------------------------------------------------------ c icount = 0 do m = 1,numvdw # include "ew_directp.h" enddo c c calculation starts: loop over the data gathered in the temporary c array caches. c #ifdef MASSLIB call vrsqrt( cache_df, cache_r2, icount ) #else do im_new = 1,icount delr2 = cache_r2(im_new) delrinv = 1.0/sqrt(delr2) cache_df(im_new) = delrinv enddo #endif C*$* NO FUSION do im_new = 1,icount j = cache_bckptr(im_new) delr2 = cache_r2(im_new) delrinv = cache_df(im_new) c c -- cubic spline on switch: c delr = delr2*delrinv delr2inv = delrinv*delrinv x = dxdr*delr ind = eedtbdns*x dx = x - ind*del ind = 4*ind e3dx = dx*eed_cub(3+ind) e4dx = dx*dx*eed_cub(4+ind) switch = eed_cub(1+ind) + dx*(eed_cub(2+ind) + $ (e3dx + e4dx*third)*half) d_switch_dx = eed_cub(2+ind) + e3dx+ e4dx*half c c ---TD Got the idea for B_l from Walter Smith's CCP5 article 1982 c Ewald for point multipoles c B0 = switch*delr*delr2inv B1 = (B0 - d_switch_dx*dxdr)*delr2inv #ifdef LES if (use_pme.ne.0) then c c ---if we are using PME, then the correction for lfac will c be done after the reciprocal space calculation is done, c so no need for it here c comm1 = cgi*charge(j) else lfac=lesfac(lestmp+lestyp(j)) comm1 = cgi*charge(j)*lfac endif #else comm1 = cgi*charge(j) #endif c eelt = eelt + comm1*B0 dfee = comm1*B1 ee_vir_iso = ee_vir_iso - dfee*delr2 cache_r2(im_new)=delr2inv cache_df(im_new)=dfee enddo c epilogue # include "ew_directe.h" c c------------------------------------------------------------------------------ c Now loop over the 12-10 LJ terms for eedmeth = 1 c------------------------------------------------------------------------------ c icount = 0 do m = numvdw+1,numtot # include "ew_directp.h" enddo c #ifdef MASSLIB call vrsqrt( cache_df, cache_r2, icount ) #else do im_new = 1,icount delr2 = cache_r2(im_new) delrinv = 1.0/sqrt(delr2) cache_df(im_new) = delrinv enddo #endif C*$* NO FUSION do im_new = 1, icount j = cache_bckptr(im_new) delr2 = cache_r2(im_new) delrinv = cache_df(im_new) c c -- cubic spline on switch: c delr = delr2*delrinv delr2inv = delrinv*delrinv x = dxdr*delr ind = eedtbdns*x dx = x - ind*del ind = 4*ind e3dx = dx*eed_cub(3+ind) e4dx = dx*dx*eed_cub(4+ind) switch = eed_cub(1+ind) + dx*(eed_cub(2+ind) + $ (e3dx + e4dx*third)*half) d_switch_dx = eed_cub(2+ind) + e3dx+ e4dx*half c c ---TD Got the idea for B_l from Walter Smith's CCP5 article 1982 c Ewald for point multipoles c B0 = switch*delr*delr2inv B1 = (B0 - d_switch_dx*dxdr)*delr2inv #ifdef LES if (use_pme.ne.0) then c c ---if we are using PME, then the correction for lfac will c be done after the reciprocal space calculation is done, c so no need for it here c comm1 = cgi*charge(j) else lfac=lesfac(lestmp+lestyp(j)) comm1 = cgi*charge(j)*lfac endif #else comm1 = cgi*charge(j) #endif c eelt = eelt + comm1*B0 dfee = comm1*B1 ee_vir_iso = ee_vir_iso - dfee*delr2 c cache_r2(im_new)=delr2inv cache_df(im_new)=dfee enddo # include "ew_directe2.h" elseif ( eedmeth .eq. 2 )then c------------------------------------------------------------------------------ c Loop over the 12-6 LJ terms for eedmeth = 2 c------------------------------------------------------------------------------ c icount = 0 do m = 1,numvdw # include "ew_directp.h" enddo c c calculation starts: loop over the data gathered in the temporary c array caches. c #ifdef MASSLIB call vrsqrt( cache_df, cache_r2, icount ) #else do im_new = 1,icount delr2 = cache_r2(im_new) delrinv = 1.0/sqrt(delr2) cache_df(im_new) = delrinv enddo #endif C*$* NO FUSION do im_new = 1,icount j = cache_bckptr(im_new) delr2 = cache_r2(im_new) delrinv = cache_df(im_new) c c -- linear lookup on switch: c delr = delr2*delrinv delr2inv = delrinv*delrinv x = dxdr*delr xx = eedtbdns*x + 1 ind = xx dx = xx - ind switch = (1.d0 - dx)*eed_lin(1,ind) + $ dx*eed_lin(1,ind+1) d_switch_dx = (1.d0 - dx)*eed_lin(2,ind) + $ dx*eed_lin(2,ind+1) B0 = switch*delrinv B1 = (B0 - d_switch_dx*dxdr)*delr2inv #ifdef LES if (use_pme.ne.0) then c c ---if we are using PME, then the correction for lfac will c be done after the reciprocal space calculation is done, c so no need for it here c comm1 = cgi*charge(j) else lfac=lesfac(lestmp+lestyp(j)) comm1 = cgi*charge(j)*lfac endif #else comm1 = cgi*charge(j) #endif c eelt = eelt + comm1*B0 dfee = comm1*B1 ee_vir_iso = ee_vir_iso - dfee*delr2 c cache_r2(im_new)=delr2inv cache_df(im_new)=dfee enddo c epilogue # include "ew_directe.h" c c------------------------------------------------------------------------------ c Now loop over the 12-10 LJ terms for eedmeth = 2 c------------------------------------------------------------------------------ c icount = 0 do m = numvdw+1,numtot # include "ew_directp.h" enddo c #ifdef MASSLIB call vrsqrt( cache_df, cache_r2, icount ) #else do im_new = 1,icount delr2 = cache_r2(im_new) delrinv = 1.0/sqrt(delr2) cache_df(im_new) = delrinv enddo #endif C*$* NO FUSION do im_new = 1, icount j = cache_bckptr(im_new) delr2 = cache_r2(im_new) delrinv = cache_df(im_new) c c -- linear lookup on switch: c delr = delr2*delrinv delr2inv = delrinv*delrinv x = dxdr*delr xx = eedtbdns*x + 1 ind = xx dx = xx - ind switch = (1.d0 - dx)*eed_lin(1,ind) + $ dx*eed_lin(1,ind+1) d_switch_dx = (1.d0 - dx)*eed_lin(2,ind) + $ dx*eed_lin(2,ind+1) B0 = switch*delrinv B1 = (B0 - d_switch_dx*dxdr)*delr2inv #ifdef LES if (use_pme.ne.0) then c c ---if we are using PME, then the correction for lfac will c be done after the reciprocal space calculation is done, c so no need for it here c comm1 = cgi*charge(j) else lfac=lesfac(lestmp+lestyp(j)) comm1 = cgi*charge(j)*lfac endif #else comm1 = cgi*charge(j) #endif c eelt = eelt + comm1*B0 dfee = comm1*B1 ee_vir_iso = ee_vir_iso - dfee*delr2 c cache_r2(im_new)=delr2inv cache_df(im_new)=dfee enddo # include "ew_directe2.h" elseif ( eedmeth .eq. 3 )then c------------------------------------------------------------------------------ c Loop over the 12-6 LJ terms for eedmeth = 3 c------------------------------------------------------------------------------ c icount = 0 do m = 1,numvdw # include "ew_directp.h" enddo c c calculation starts: loop over the data gathered in the temporary c array caches. c #ifdef MASSLIB call vrsqrt( cache_df, cache_r2, icount ) #else do im_new = 1,icount delr2 = cache_r2(im_new) delrinv = 1.0/sqrt(delr2) cache_df(im_new) = delrinv enddo #endif C*$* NO FUSION do im_new = 1,icount j = cache_bckptr(im_new) delr2 = cache_r2(im_new) delrinv = cache_df(im_new) c c -- explicit function call: c delr = delr2*delrinv delr2inv = delrinv*delrinv x = dxdr*delr call get_ee_func(x,switch,d_switch_dx,ee_type) c B0 = switch*delrinv B1 = (B0 - d_switch_dx*dxdr)*delr2inv #ifdef LES if (use_pme.ne.0) then c c ---if we are using PME, then the correction for lfac will c be done after the reciprocal space calculation is done, c so no need for it here c comm1 = cgi*charge(j) else lfac=lesfac(lestmp+lestyp(j)) comm1 = cgi*charge(j)*lfac endif #else comm1 = cgi*charge(j) #endif c eelt = eelt + comm1*B0 dfee = comm1*B1 ee_vir_iso = ee_vir_iso - dfee*delr2 c cache_r2(im_new)=delr2inv cache_df(im_new)=dfee enddo c epilogue # include "ew_directe.h" c c------------------------------------------------------------------------------ c Now loop over the 12-10 LJ terms for eedmeth = 3 c------------------------------------------------------------------------------ c icount = 0 do m = numvdw+1,numtot # include "ew_directp.h" enddo c #ifdef MASSLIB call vrsqrt( cache_df, cache_r2, icount ) #else do im_new = 1,icount delr2 = cache_r2(im_new) delrinv = 1.0/sqrt(delr2) cache_df(im_new) = delrinv enddo #endif C*$* NO FUSION do im_new = 1, icount j = cache_bckptr(im_new) delr2 = cache_r2(im_new) delrinv = cache_df(im_new) c c -- explicit function call: c delr = delr2*delrinv delr2inv = delrinv*delrinv x = dxdr*delr call get_ee_func(x,switch,d_switch_dx,ee_type) c B0 = switch*delrinv B1 = (B0 - d_switch_dx*dxdr)*delr2inv #ifdef LES if (use_pme.ne.0) then c c ---if we are using PME, then the correction for lfac will c be done after the reciprocal space calculation is done, c so no need for it here c comm1 = cgi*charge(j) else lfac=lesfac(lestmp+lestyp(j)) comm1 = cgi*charge(j)*lfac endif #else comm1 = cgi*charge(j) #endif c eelt = eelt + comm1*B0 dfee = comm1*B1 ee_vir_iso = ee_vir_iso - dfee*delr2 c cache_r2(im_new)=delr2inv cache_df(im_new)=dfee enddo # include "ew_directe2.h" elseif ( eedmeth .eq. 4 )then c------------------------------------------------------------------------------ c Loop over the 12-6 LJ terms for eedmeth = 4 c------------------------------------------------------------------------------ c icount = 0 do m = 1,numvdw # include "ew_directp.h" enddo c c calculation starts: loop over the data gathered in the temporary c array caches. c #ifdef MASSLIB call vrsqrt( cache_df, cache_r2, icount ) #else do im_new = 1,icount delr2 = cache_r2(im_new) delrinv = 1.0/sqrt(delr2) cache_df(im_new) = delrinv enddo #endif C*$* NO FUSION do im_new = 1,icount j = cache_bckptr(im_new) delr2 = cache_r2(im_new) delrinv = cache_df(im_new) c c -- don't use a switch: c -- straight Coulomb c delr2inv = delrinv*delrinv #ifdef LES if (use_pme.ne.0) then B0 = cgi*charge(j)*delrinv else lfac=lesfac(lestmp+lestyp(j)) B0 = cgi*charge(j)*lfac*delrinv endif #else B0 = cgi*charge(j)*delrinv #endif eelt = eelt + B0 dfee = B0*delr2inv c cache_r2(im_new)=delr2inv cache_df(im_new)=dfee enddo c epilogue # include "ew_directe.h" c c------------------------------------------------------------------------------ c Now loop over the 12-10 LJ terms for eedmeth = 4 c------------------------------------------------------------------------------ c icount = 0 do m = numvdw+1,numtot # include "ew_directp.h" enddo c #ifdef MASSLIB call vrsqrt( cache_df, cache_r2, icount ) #else do im_new = 1,icount delr2 = cache_r2(im_new) delrinv = 1.0/sqrt(delr2) cache_df(im_new) = delrinv enddo #endif C*$* NO FUSION do im_new = 1, icount j = cache_bckptr(im_new) delr2 = cache_r2(im_new) delrinv = cache_df(im_new) c c -- don't use a switch: c -- straight Coulomb c delr2inv = delrinv*delrinv #ifdef LES if (use_pme.ne.0) then B0 = cgi*charge(j)*delrinv else lfac=lesfac(lestmp+lestyp(j)) B0 = cgi*charge(j)*lfac*delrinv endif #else B0 = cgi*charge(j)*delrinv #endif eelt = eelt + B0 dfee = B0*delr2inv c cache_r2(im_new)=delr2inv cache_df(im_new)=dfee enddo # include "ew_directe2.h" elseif ( eedmeth .eq. 5 )then c------------------------------------------------------------------------------ c Loop over the 12-6 LJ terms for eedmeth = 5 c------------------------------------------------------------------------------ c icount = 0 do m = 1,numvdw # include "ew_directp.h" enddo c c calculation starts: loop over the data gathered in the temporary c array caches. c #ifdef MASSLIB call vrec( cache_df, cache_r2, icount ) #else do im_new = 1,icount delr2 = cache_r2(im_new) delr2inv = 1.d0/delr2 cache_df(im_new) = delr2inv enddo #endif C*$* NO FUSION do im_new = 1,icount j = cache_bckptr(im_new) delr2 = cache_r2(im_new) c c -- use dielectric of 1/r: c delr2inv = cache_df(im_new) c #ifdef LES if (use_pme.ne.0) then B0 = cgi*charge(j)*delr2inv else lfac=lesfac(lestmp+lestyp(j)) B0 = cgi*charge(j)*lfac*delr2inv endif #else B0 = cgi*charge(j)*delr2inv #endif eelt = eelt + B0 dfee = 2.d0*B0*delr2inv cache_r2(im_new)=delr2inv cache_df(im_new)=dfee enddo c epilogue # include "ew_directe.h" c c------------------------------------------------------------------------------ c Now loop over the 12-10 LJ terms for eedmeth = 5 c------------------------------------------------------------------------------ c icount = 0 do m = numvdw+1,numtot # include "ew_directp.h" enddo c #ifdef MASSLIB call vrec ( cache_df, cache_r2, icount ) #else do im_new = 1,icount delr2 = cache_r2(im_new) delr2inv = 1.d0/delr2 cache_df(im_new) = delr2inv enddo #endif C*$* NO FUSION do im_new = 1, icount j = cache_bckptr(im_new) delr2 = cache_r2(im_new) c c -- use dielectric of 1/r: c delr2inv = cache_df(im_new) #ifdef LES if (use_pme.ne.0) then B0 = cgi*charge(j)*delr2inv else lfac=lesfac(lestmp+lestyp(j)) B0 = cgi*charge(j)*lfac*delr2inv endif #else B0 = cgi*charge(j)*delr2inv #endif eelt = eelt + B0 dfee = 2.d0*B0*delr2inv cache_r2(im_new)=delr2inv cache_df(im_new)=dfee enddo # include "ew_directe2.h" endif frc(1,i) = frc(1,i) - dumx frc(2,i) = frc(2,i) - dumy frc(3,i) = frc(3,i) - dumz virial(1,1) = virial(1,1) + vxx virial(1,2) = virial(1,2) + vxy virial(2,1) = virial(2,1) + vxy virial(1,3) = virial(1,3) + vxz virial(3,1) = virial(3,1) + vxz virial(2,2) = virial(2,2) + vyy virial(2,3) = virial(2,3) + vyz virial(3,2) = virial(3,2) + vyz virial(3,3) = virial(3,3) + vzz eedvir = eedvir + ee_vir_iso return end c c----------------------------------------------------- c---- SHORT_ENE_DIP c----------------------------------------------------- c subroutine short_ene_dip(i,xk,yk,zk,ipairs,numtot,numvdw, $ bckptr,imagcrds,ewaldcof,eedtbdns, $ eed_cub,eed_lin,charge,dipole, $ ntypes,iac,ico,cn1,cn2,asol,bsol,filter_cut, $ eelt,epol,evdw,ehb,frc,field,virial, $ ee_type,eedmeth,dxdr,eedvir) implicit none _REAL_ xk,yk,zk,imagcrds(3,*) integer i,numvdw,numtot,bckptr(*) integer ipairs(*),ee_type,eedmeth _REAL_ ewaldcof,eed_cub(4,*),eed_lin(2,*), $ charge(*),dipole(3,*),virial(3,3),eedvir _REAL_ eedtbdns,filter_cut,dxdr integer ntypes,iac(*),ico(*) _REAL_ cn1(*),cn2(*),asol(*),bsol(*), $ eelt,epol,evdw,ehb,frc(3,*),field(3,*) integer ic,j,m,n,ind,iaci _REAL_ del _REAL_ switch,d_switch_dx _REAL_ ee_vir_iso _REAL_ edx,edy,edz _REAL_ B0,B1,B2,B3,fac,dotir,dotjr,dotij,fact _REAL_ dphii_dx,dphii_dy,dphii_dz, $ dphij_dx,dphij_dy,dphij_dz _REAL_ term,term0,term1,termi,termj,cgj _REAL_ pi,third,half,filter_cut2,xx _REAL_ xktran(3,18) integer mask27 #ifdef LES # include "les.h" #endif #include "ew_nbe.h" # include "ew_parallel.h" # include "parallel.h" # include "ew_tran.h" integer itran c pi = 3.14159265358979323846d0 third=1.d0/3.d0 half = 0.5d0 fac = 2.d0*ewaldcof*ewaldcof ee_vir_iso = 0.d0 del = 1.d0 / eedtbdns dumx = 0.d0 dumy = 0.d0 dumz = 0.d0 edx = 0.d0 edy = 0.d0 edz = 0.d0 filter_cut2 = filter_cut*filter_cut cgi = charge(i) iaci = ntypes * (iac(i) - 1) mask27 = 2**27 - 1 do m=1,18 xktran(1,m) = tranvec(1,m) - xk xktran(2,m) = tranvec(2,m) - yk xktran(3,m) = tranvec(3,m) - zk enddo #ifdef LES lestmp=nlesty*(lestyp(i)-1) #endif do m = 1,numvdw n = ipairs(m) itran=ishft(n,-27) n = iand(n,mask27) j = bckptr(n) delx = imagcrds(1,n) + xktran(1,itran) dely = imagcrds(2,n) + xktran(2,itran) delz = imagcrds(3,n) + xktran(3,itran) delr2 = delx*delx + dely*dely+delz*delz if ( delr2 .lt. filter_cut2 )then delr = sqrt(delr2) delr2inv = 1.d0/delr2 x = dxdr*delr cgj = charge(j) if ( eedmeth .eq. 1 )then c c -- cubic spline on switch c ind = eedtbdns*x + 1 dx = x - (ind-1.d0)*del switch = eed_cub(1,ind)+dx*(eed_cub(2,ind)+ $ dx*(eed_cub(3,ind)+dx*eed_cub(4,ind)*third)*0.5d0) d_switch_dx = eed_cub(2,ind)+dx*(eed_cub(3,ind)+ $ dx*eed_cub(4,ind)*half) c elseif ( eedmeth .eq. 2 )then c c ---linear lookup on switch, deriv c xx = eedtbdns*x + 1 ind = xx dx = xx - ind switch = (1.d0 - dx)*eed_lin(1,ind) + $ dx*eed_lin(1,ind+1) d_switch_dx = (1.d0 - dx)*eed_lin(2,ind) + $ dx*eed_lin(2,ind+1) c elseif ( eedmeth .eq. 3 )then c c ---direct function call: c call get_ee_func(x,switch,d_switch_dx,ee_type) c else if ( eedmeth.eq.4 ) then c c ---use un-modified Coulomb interaction, no switch c switch = 1.d0 d_switch_dx = 0.d0 c else c write(6,*) 'bad eedmeth in ew_short_dip: ',eedmeth call mexit( 6,1 ) c endif c c TD Got the idea for B_l from Walter Smith's CCP5 article 1982 c Ewald for point multipoles c B_l satisfies grad_i B_l(|r_j - r_i|) = (r_j - r_i)B_{l+1}(|r_j - r_i|) c grad_j B_l(|r_j - r_i|) = -grad_i B_l(|r_j - r_i|) c B0 = switch*delr*delr2inv fact = d_switch_dx*dxdr B1 = (B0 - fact)*delr2inv fact = fac*fact B2 = (3.d0*B1 - fact)*delr2inv fact = fac*fact B3 = (5.d0*B2 - fact)*delr2inv c c B1 = (B0 - d_switch_dx*dxdr)*delr2inv c B2 = (3.d0*B1 - fac*ewaldcof*d_switch_dx)*delr2inv c B3 = (5.d0*B2 - fac*fac*ewaldcof*d_switch_dx)*delr2inv c c epol = dip_i dot grad_i of phii c phii is direct sum electrostatic potential at i due to j c so phii = cgj*B0 + dipj dot gradj of B0 = cgj*B0 - dotjr*B1 c dphii_dx etc are derivatives with respect to r_i c phij is direct sum electrostatic potential at j due to i c dphij_dx etc are derivatives with respect to r_j c dotjr = dipole(1,j)*delx+dipole(2,j)*dely+dipole(3,j)*delz dotir = dipole(1,i)*delx+dipole(2,i)*dely+dipole(3,i)*delz dotij = dipole(1,i)*dipole(1,j)+dipole(2,i)*dipole(2,j)+ $ dipole(3,i)*dipole(3,j) c c gradi phii = cgj*rij*B1 + dipj*B1 - dotjr*rij*B2 c so epol = -cgi*dotjr*B1 + (cgj*B1 - dotjr*B2)*dotir + dotij*B1 c eelt = eelt + cgi*cgj*B0 term = cgj*dotir-cgi*dotjr+dotij epol = epol + term*B1 - dotir*dotjr*B2 term0 = cgi*cgj*B0 + term*B1 - dotir*dotjr*B2 c c so ene = ene + term0; dfx = dterm0_dx etc c grad term0 = term1*rij + B1*grad_i term - grad_i dotir*dotjr*B2 c grad_i term = -cgj*dip_i + cgi*dip_j c grad_i dotir = -dip_i; similar for dotjr c grad_i term0 = term1*rij + (-cgj*B1+dotjr*B2)*dip_i + (cgi*B1+dotir*B2)*dip_j c term1 = cgi*cgj*B1 + term*B2 - dotir*dotjr*B3 termi = cgi*B1+dotir*B2 termj = cgj*B1-dotjr*B2 dfx = term1*delx + termi*dipole(1,j) - termj*dipole(1,i) dfy = term1*dely + termi*dipole(2,j) - termj*dipole(2,i) dfz = term1*delz + termi*dipole(3,j) - termj*dipole(3,i) ee_vir_iso = ee_vir_iso - dfx*delx - dfy*dely - dfz*delz ic = ico(iaci+iac(j)) r6 = delr2inv*delr2inv*delr2inv #ifdef LES lfac=lesfac(lestmp+lestyp(j)) f6 = cn2(ic)*r6*lfac f12 = cn1(ic)*(r6*r6)*lfac #else f6 = cn2(ic)*r6 f12 = cn1(ic)*(r6*r6) #endif evdw = evdw + f12 - f6 c c ---force related quantities c df = (12.d0*f12 - 6.d0*f6)*delr2inv dfx = dfx + df*delx dfy = dfy + df*dely dfz = dfz + df*delz virial(1,1) = virial(1,1) - dfx*delx virial(1,2) = virial(1,2) - dfx*dely virial(1,3) = virial(1,3) - dfx*delz virial(2,1) = virial(2,1) - dfy*delx virial(2,2) = virial(2,2) - dfy*dely virial(2,3) = virial(2,3) - dfy*delz virial(3,1) = virial(3,1) - dfz*delx virial(3,2) = virial(3,2) - dfz*dely virial(3,3) = virial(3,3) - dfz*delz frc(1,j) = frc(1,j) + dfx frc(2,j) = frc(2,j) + dfy frc(3,j) = frc(3,j) + dfz dumx = dumx + dfx dumy = dumy + dfy dumz = dumz + dfz c c ---field related quantities c dphii_dx = termj*delx + B1*dipole(1,j) dphii_dy = termj*dely + B1*dipole(2,j) dphii_dz = termj*delz + B1*dipole(3,j) dphij_dx = -termi*delx + B1*dipole(1,i) dphij_dy = -termi*dely + B1*dipole(2,i) dphij_dz = -termi*delz + B1*dipole(3,i) edx = edx + dphii_dx edy = edy + dphii_dy edz = edz + dphii_dz field(1,j) = field(1,j) - dphij_dx field(2,j) = field(2,j) - dphij_dy field(3,j) = field(3,j) - dphij_dz endif end do c do m = numvdw+1,numtot n = ipairs(m) itran=ishft(n,-27) n = iand(n,mask27) j = bckptr(n) delx = imagcrds(1,n) + xktran(1,itran) dely = imagcrds(2,n) + xktran(2,itran) delz = imagcrds(3,n) + xktran(3,itran) delr2 = delx*delx + dely*dely+delz*delz if ( delr2 .lt. filter_cut2 )then delr = sqrt(delr2) delr2inv = 1.d0/delr2 x = dxdr*delr cgj = charge(j) if ( eedmeth .eq. 1 )then c c -- cubic spline on switch c ind = eedtbdns*x + 1 dx = x - (ind-1.d0)*del switch = eed_cub(1,ind)+dx*(eed_cub(2,ind)+ $ dx*(eed_cub(3,ind)+dx*eed_cub(4,ind)*third)*0.5d0) d_switch_dx = eed_cub(2,ind)+dx*(eed_cub(3,ind)+ $ dx*eed_cub(4,ind)*half) c elseif ( eedmeth .eq. 2 )then c c ---linear lookup on switch, deriv c xx = eedtbdns*x + 1 ind = xx dx = xx - ind switch = (1.d0 - dx)*eed_lin(1,ind) + $ dx*eed_lin(1,ind+1) d_switch_dx = (1.d0 - dx)*eed_lin(2,ind) + $ dx*eed_lin(2,ind+1) c elseif ( eedmeth .eq. 3 )then c c ---direct function call: c call get_ee_func(x,switch,d_switch_dx,ee_type) c else if ( eedmeth.eq.4 ) then c c ---use un-modified Coulomb interaction, no switch c switch = 1.d0 d_switch_dx = 0.d0 c else c write(6,*) 'bad eedmeth in ew_short_dip: ',eedmeth call mexit( 6,1 ) c endif c B0 = switch*delr*delr2inv fact = d_switch_dx*dxdr B1 = (B0 - fact)*delr2inv fact = fac*fact B2 = (3.d0*B1 - fact)*delr2inv fact = fac*fact B3 = (5.d0*B2 - fact)*delr2inv c B0 = switch*delr*delr2inv c B1 = (B0 - d_switch_dx*dxdr)*delr2inv c B2 = (3.d0*B1 - fac*ewaldcof*d_switch_dx)*delr2inv c B3 = (5.d0*B2 - fac*fac*ewaldcof*d_switch_dx)*delr2inv c dotjr = dipole(1,j)*delx+dipole(2,j)*dely+dipole(3,j)*delz dotir = dipole(1,i)*delx+dipole(2,i)*dely+dipole(3,i)*delz dotij = dipole(1,i)*dipole(1,j)+dipole(2,i)*dipole(2,j)+ $ dipole(3,i)*dipole(3,j) eelt = eelt + cgi*cgj*B0 term = cgj*dotir-cgi*dotjr+dotij epol = epol + term*B1 - dotir*dotjr*B2 term0 = cgi*cgj*B0 + term*B1 - dotir*dotjr*B2 term1 = cgi*cgj*B1 + term*B2 - dotir*dotjr*B3 termi = cgi*B1+dotir*B2 termj = cgj*B1-dotjr*B2 dfx = (term1)*delx + termi*dipole(1,j) - termj*dipole(1,i) dfy = (term1)*dely + termi*dipole(2,j) - termj*dipole(2,i) dfz = (term1)*delz + termi*dipole(3,j) - termj*dipole(3,i) ee_vir_iso = ee_vir_iso - dfx*delx - dfy*dely - dfz*delz #ifdef HAS_10_12 c c --- this code allows 10-12 terms; in many (most?) (all?) cases, the c only "nominal" 10-12 terms are on waters, where the asol and bsol c parameters are always zero; hence we can skip this part. c ic = -ico(iaci+iac(j)) r10 = delr2inv*delr2inv*delr2inv*delr2inv*delr2inv f10 = bsol(ic)*r10 f12 = asol(ic)*(r10*delr2inv) ehb = ehb + f12 - f10 df = (12.d0*f12 - 10.d0*f10)*delr2inv #else df = 0.d0 #endif c c ---force related quantities c dfx = dfx + df*delx dfy = dfy + df*dely dfz = dfz + df*delz virial(1,1) = virial(1,1) - dfx*delx virial(1,2) = virial(1,2) - dfx*dely virial(1,3) = virial(1,3) - dfx*delz virial(2,1) = virial(2,1) - dfy*delx virial(2,2) = virial(2,2) - dfy*dely virial(2,3) = virial(2,3) - dfy*delz virial(3,1) = virial(3,1) - dfz*delx virial(3,2) = virial(3,2) - dfz*dely virial(3,3) = virial(3,3) - dfz*delz frc(1,j) = frc(1,j) + dfx frc(2,j) = frc(2,j) + dfy frc(3,j) = frc(3,j) + dfz dumx = dumx + dfx dumy = dumy + dfy dumz = dumz + dfz c c ---field related quantities c dphii_dx = termj*delx + B1*dipole(1,j) dphii_dy = termj*dely + B1*dipole(2,j) dphii_dz = termj*delz + B1*dipole(3,j) dphij_dx = -termi*delx + B1*dipole(1,i) dphij_dy = -termi*dely + B1*dipole(2,i) dphij_dz = -termi*delz + B1*dipole(3,i) edx = edx + dphii_dx edy = edy + dphii_dy edz = edz + dphii_dz field(1,j) = field(1,j) - dphij_dx field(2,j) = field(2,j) - dphij_dy field(3,j) = field(3,j) - dphij_dz endif end do frc(1,i) = frc(1,i) - dumx frc(2,i) = frc(2,i) - dumy frc(3,i) = frc(3,i) - dumz field(1,i) = field(1,i) - edx field(2,i) = field(2,i) - edy field(3,i) = field(3,i) - edz eedvir = eedvir + ee_vir_iso return end c----------------------------------------------------------------- c FIX_GRID_BALANCE c----------------------------------------------------------------- subroutine fix_grid_balance(gindexlo,gindexhi,gridpairs, $ indtop,numtasks,mytaskid,listdiffmax) implicit none integer gindexlo,gindexhi,gridpairs(*) integer indtop,listtot,numtasks,mytaskid,listdiffmax integer i,j,n,lsum,i0,ishare n=0 lsum=0 gindexlo=0 gindexhi=0 listtot=0 do i=1,indtop listtot=listtot+gridpairs(i) enddo i0=1 ishare=listtot/numtasks c c ------ set trigger for new balance as 10 percent of list size c listdiffmax=max(1000,ishare/10) c c ------ give cells to each processor .lt.mytaskid till c they have ishare, then give the next c cells to this pe till it has its share, then return c do i=1,indtop lsum=lsum+gridpairs(i) if(lsum.ge.ishare)then if(n.eq.mytaskid)then gindexlo=i0 gindexhi=i return endif lsum=0 i0=i+1 n=n+1 endif enddo if(n.eq.mytaskid)then gindexlo=i0 gindexhi=indtop return endif gindexlo=indtop return end