Warning: no access to tty (Bad file descriptor). Thus no job control in this shell. stdin: is not a tty 15:22:26 up 1 day, 5:56, 0 users, load average: 2.21, 1.05, 0.55 set verbose qstat -f $PBS_JOBID qstat -f 765032 Job Id: 765032 Job_Name = jac9999multinodecudampi.ruby Job_Owner = scott@ruby01.osc.edu job_state = R queue = parallel server = ruby-batch.osc.edu:15001 Account_Name = PZS0530 Checkpoint = u ctime = Tue Mar 21 15:19:40 2017 Error_Path = ruby01.osc.edu:/users/me/scott/workshops/compchem/amber/jac99 99multinodecudampi.ruby.e765032 exec_host = r0214/0-19+r0218/0-19 exec_gpus = r0214-gpu/0+r0218-gpu/0 Hold_Types = n Join_Path = oe Keep_Files = n Mail_Points = ae Mail_Users = scott@osc.edu mtime = Tue Mar 21 15:22:25 2017 Output_Path = ruby01.osc.edu:/users/me/scott/workshops/compchem/amber/jac9 999multinodecudampi.ruby.o765032 Priority = 0 qtime = Tue Mar 21 15:19:40 2017 Rerunable = True Resource_List.feature = partgpu Resource_List.gattr = me Resource_List.nodect = 2 Resource_List.nodes = 2:ppn=20:gpus=1 Resource_List.walltime = 00:10:00 session_id = 0 Shell_Path_List = /bin/csh substate = 42 Variable_List = PBS_O_QUEUE=batch,PBS_O_HOME=/users/me/scott, PBS_O_LOGNAME=scott, PBS_O_PATH=.:/users/me/scott/bin:/users/me/scott/bin/valgrind/2.4.0/b in:.:/users/me/scott/bin:/usr/lib64/qt-3.3/bin:/usr/local/mvapich2/int el/16.0/2.2/bin:/usr/local/gcc/4.8.5/bin:/usr/local/intel/vtune_amplif ier_xe_2016.3.0.463186/bin64:/usr/local/intel/inspector_xe_2016.1.3.46 0803/bin64:/usr/local/intel/compilers_and_libraries_2016.3.210/linux/b in/intel64:/usr/local/moab/9.0.3-1485382218_a2ab513-el6/bin:/usr/local /torque/6.0.3-1485300822_19e79ad/bin:/usr/local/bin:/bin:/usr/bin:/opt /ibutils/bin:/nfs/10/scott/cc/casino/CASINO/bin_qmc:/usr/local/bin:/usr/ sbin:/usr/gnu/bin:/usr/common/bin/netpbm:/usr/java/bin, PBS_O_MAIL=/var/spool/mail/scott,PBS_O_SHELL=/bin/tcsh,PBS_O_LANG=en_US, PBS_O_SUBMIT_FILTER=/usr/local/sbin/torque_submitfilter, PBS_O_WORKDIR=/users/me/scott/workshops/compchem/amber, PBS_O_HOST=ruby01.osc.edu,PBS_O_SERVER=ruby-batch.osc.edu euser = scott egroup = me queue_type = E etime = Tue Mar 21 15:19:40 2017 submit_args = jac9999multinodecudampi.ruby.pbs.test start_time = Tue Mar 21 15:22:25 2017 Walltime.Remaining = 599 start_count = 1 fault_tolerant = False job_radix = 0 submit_host = ruby01.osc.edu init_work_dir = /users/me/scott/workshops/compchem/amber gpu_flags = 0 request_version = 1 req_information.task_count.0 = 2 req_information.lprocs.0 = 20 req_information.gpus.0 = 1 req_information.thread_usage_policy.0 = allowthreads req_information.hostlist.0 = r0214:ppn=20 req_information.hostlist.0 = r0218:ppn=20 req_information.task_usage.0.task.0.cpu_list = 0-19 req_information.task_usage.0.task.0.mem_list = 0-1 req_information.task_usage.0.task.0.cores = 0 req_information.task_usage.0.task.0.threads = 20 req_information.task_usage.0.task.0.host = r0214 req_information.task_usage.0.task.1.cpu_list = 0-19 req_information.task_usage.0.task.1.mem_list = 0-1 req_information.task_usage.0.task.1.cores = 0 req_information.task_usage.0.task.1.threads = 20 req_information.task_usage.0.task.1.host = r0218 cpuset_string = r0214:0-19+r0218:0-19 memset_string = r0214:0-1+r0218:0-1 module list eval `$LMOD_CMD csh list` /usr/local/lmod/6.0.1/libexec/lmod csh list Currently Loaded Modules: 1) torque/6.0.3-1485300822_19e79ad 4) cxx11/4.8.5 7) amber/16 2) moab/9.0.3-1485382218_a2ab513-el6 5) mvapich2/2.2 8) cuda/8.0.44 3) intel/16.0.3 6) modules/au2016 echo "AMBERHOME=$AMBERHOME" echo AMBERHOME=/usr/local/amber/amber16 AMBERHOME=/usr/local/amber/amber16 echo "PBS_O_WORKDIR=$PBS_O_WORKDIR" echo PBS_O_WORKDIR=/users/me/scott/workshops/compchem/amber PBS_O_WORKDIR=/users/me/scott/workshops/compchem/amber cd $TMPDIR cd /tmp/pbstmp.765032 set MDIN=mdin9999 set MDIN=mdin9999 set MDOUT=mdout9999 set MDOUT=mdout9999 set MDINFO=mdinfo set MDINFO=mdinfo set PRMTOP=prmtop set PRMTOP=prmtop set INPCRD=inpcrd.equil set INPCRD=inpcrd.equil set REFC=refc set REFC=refc set MDCRD=mdcrd set MDCRD=mdcrd set MDVEL=mdvel set MDVEL=mdvel set MDEN=mden set MDEN=mden set RESTRT=restrt set RESTRT=restrt cp -p $PBS_O_WORKDIR/$MDIN . cp -p /users/me/scott/workshops/compchem/amber/mdin9999 . cp -p $PBS_O_WORKDIR/$PRMTOP . cp -p /users/me/scott/workshops/compchem/amber/prmtop . cp -p $PBS_O_WORKDIR/$INPCRD . cp -p /users/me/scott/workshops/compchem/amber/inpcrd.equil . cp -p $PBS_O_WORKDIR/$REFC . cp -p /users/me/scott/workshops/compchem/amber/refc . cp: cannot stat `/users/me/scott/workshops/compchem/amber/refc': No such file or directory pdsh nvidia-smi pdsh nvidia-smi r0214: Tue Mar 21 15:22:28 2017 r0214: +-----------------------------------------------------------------------------+ r0214: | NVIDIA-SMI 375.39 Driver Version: 375.39 | r0214: |-------------------------------+----------------------+----------------------+ r0214: | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | r0214: | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | r0214: |===============================+======================+======================| r0214: | 0 Tesla K40m Off | 0000:08:00.0 Off | 0 | r0214: | N/A 38C P8 21W / 235W | 2MiB / 11439MiB | 0% Default | r0214: +-------------------------------+----------------------+----------------------+ r0214: r0214: +-----------------------------------------------------------------------------+ r0214: | Processes: GPU Memory | r0214: | GPU PID Type Process name Usage | r0214: |=============================================================================| r0214: | No running processes found | r0214: +-----------------------------------------------------------------------------+ r0218: Tue Mar 21 15:22:29 2017 r0218: +-----------------------------------------------------------------------------+ r0218: | NVIDIA-SMI 375.39 Driver Version: 375.39 | r0218: |-------------------------------+----------------------+----------------------+ r0218: | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | r0218: | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | r0218: |===============================+======================+======================| r0218: | 0 Tesla K40m Off | 0000:08:00.0 Off | 0 | r0218: | N/A 26C P8 19W / 235W | 2MiB / 11439MiB | 0% Default | r0218: +-------------------------------+----------------------+----------------------+ r0218: r0218: +-----------------------------------------------------------------------------+ r0218: | Processes: GPU Memory | r0218: | GPU PID Type Process name Usage | r0218: |=============================================================================| r0218: | No running processes found | r0218: +-----------------------------------------------------------------------------+ set ngpus=`cat $PBS_GPUFILE|wc -l` set ngpus=`cat $PBS_GPUFILE|wc -l` wc -l cat /var/spool/batch/torque_ruby/aux//765032gpu cat $PBS_GPUFILE cat /var/spool/batch/torque_ruby/aux//765032gpu r0214-gpu0 r0218-gpu0 mpiexec -n $ngpus pmemd.cuda.MPI -i $MDIN -o $MDOUT -inf $MDINFO -p $PRMTOP -c $INPCRD -ref $REFC -x $MDCRD -v $MDVEL -e $MDEN -r $RESTRT mpiexec -n 2 pmemd.cuda.MPI -i mdin9999 -o mdout9999 -inf mdinfo -p prmtop -c inpcrd.equil -ref refc -x mdcrd -v mdvel -e mden -r restrt gpu_allreduce cudaDeviceSynchronize failed an illegal memory access was encountered =================================================================================== = BAD TERMINATION OF ONE OF YOUR meICATION PROCESSES = PID 30819 RUNNING AT r0214 = EXIT CODE: 255 = CLEANING UP REMAINING PROCESSES = YOU CAN IGNORE THE BELOW CLEANUP MESSAGES =================================================================================== ls -al ls -al total 4580 drwx------ 2 scott me 4096 Mar 21 15:22 . drwxrwxrwt 9 root root 135168 Mar 21 15:22 .. -rw-r--r-- 1 scott amber 1719909 Mar 15 2002 inpcrd.equil -rw------- 1 scott me 259 Mar 21 15:22 logfile -rw-r--r-- 1 scott amber 239 Jul 20 2016 mdin9999 -rw------- 1 scott me 1033 Mar 21 15:22 mdinfo -rw------- 1 scott me 13680 Mar 21 15:22 mdout9999 -rw-r--r-- 1 scott amber 2795715 Mar 15 2002 prmtop cp -p $MDOUT $MDINFO $MDCRD $MDVEL $MDEN $RESTRT $PBS_O_WORKDIR cp -p mdout9999 mdinfo mdcrd mdvel mden restrt /users/me/scott/workshops/compchem/amber cp: cannot stat `mdcrd': No such file or directory cp: cannot stat `mdvel': No such file or directory cp: cannot stat `mden': No such file or directory cp: cannot stat `restrt': No such file or directory cat $MDOUT cat mdout9999 ------------------------------------------------------- Amber 16 PMEMD 2016 ------------------------------------------------------- | PMEMD implementation of SANDER, Release 16 | Run on 03/21/2017 at 15:22:32 | Executable path: pmemd.cuda.MPI | Working directory: /tmp/pbstmp.765032 | Hostname: r0214.osc.edu File Assignments: | MDIN: mdin9999 | MDOUT: mdout9999 | INPCRD: inpcrd.equil | PARM: prmtop | RESTRT: restrt | REFC: refc | MDVEL: mdvel | MDEN: mden | MDCRD: mdcrd | MDINFO: mdinfo |LOGFILE: logfile | MDFRC: mdfrc Here is the input file: short md, nve ensemble &cntrl ntx=7, irest=1, ntc=2, ntf=2, tol=0.0000001, nstlim=9999, ntpr=1000, ntwr=10000, dt=0.001, cut=9., ntt=0, temp0=300., &end &ewald nfft1=64,nfft2=64,nfft3=64, skinnb=2., &end Note: ig = -1. Setting random seed to 207874 based on wallclock time in microseconds and disabling the synchronization of random numbers between tasks to improve performance. |--------------------- INFORMATION ---------------------- | GPU (CUDA) Version of PMEMD in use: NVIDIA GPU IN USE. | Version 16.0.0 | | 02/25/2016 | | Implementation by: | Ross C. Walker (SDSC) | Scott Le Grand (nVIDIA) | | Precision model in use: | [SPFP] - Single Precision Forces, 64-bit Fixed Point | Accumulation. (Default) | |-------------------------------------------------------- |----------------- CITATION INFORMATION ----------------- | | When publishing work that utilized the CUDA version | of AMBER, please cite the following in addition to | the regular AMBER citations: | | - Romelia Salomon-Ferrer; Andreas W. Goetz; Duncan | Poole; Scott Le Grand; Ross C. Walker "Routine | microsecond molecular dynamics simulations with | AMBER - Part II: Particle Mesh Ewald", J. Chem. | Theory Comput., 2013, 9 (9), pp3878-3888, | DOI: 10.1021/ct400314y. | | - Andreas W. Goetz; Mark J. Williamson; Dong Xu; | Duncan Poole; Scott Le Grand; Ross C. Walker | "Routine microsecond molecular dynamics simulations | with AMBER - Part I: Generalized Born", J. Chem. | Theory Comput., 2012, 8 (5), pp1542-1555. | | - Scott Le Grand; Andreas W. Goetz; Ross C. Walker | "SPFP: Speed without compromise - a mixed precision | model for GPU accelerated molecular dynamics | simulations.", Comp. Phys. Comm., 2013, 184 | pp374-380, DOI: 10.1016/j.cpc.2012.09.022 | |-------------------------------------------------------- |------------------- GPU DEVICE INFO -------------------- | | Task ID: 0 | CUDA_VISIBLE_DEVICES: not set | CUDA Capable Devices Detected: 1 | CUDA Device ID in use: 0 | CUDA Device Name: Tesla K40m | CUDA Device Global Mem Size: 11439 MB | CUDA Device Num Multiprocessors: 15 | CUDA Device Core Freq: 0.75 GHz | | | Task ID: 1 | CUDA_VISIBLE_DEVICES: not set | CUDA Capable Devices Detected: 1 | CUDA Device ID in use: 0 | CUDA Device Name: Tesla K40m | CUDA Device Global Mem Size: 11439 MB | CUDA Device Num Multiprocessors: 15 | CUDA Device Core Freq: 0.75 GHz | |-------------------------------------------------------- |---------------- GPU PEER TO PEER INFO ----------------- | | Peer to Peer support: ENABLED | |-------------------------------------------------------- | Conditional Compilation Defines Used: | MPI | PUBFFT | BINTRAJ | MKL | CUDA | EMIL | Largest sphere to fit in unit cell has radius = 31.115 | INFO: Old style PARM file read | Note: 1-4 EEL scale factors were NOT found in the topology file. | Using default value of 1.2. | Note: 1-4 VDW scale factors were NOT found in the topology file. | Using default value of 2.0. | Duplicated 0 dihedrals | Duplicated 0 dihedrals -------------------------------------------------------------------------------- 1. RESOURCE USE: -------------------------------------------------------------------------------- getting new box info from bottom of inpcrd NATOM = 23558 NTYPES = 16 NBONH = 22290 MBONA = 1302 NTHETH = 2789 MTHETA = 1772 NPHIH = 5026 MPHIA = 3140 NHPARM = 0 NPARM = 0 NNB = 41859 NRES = 7182 NBONA = 1302 NTHETA = 1772 NPHIA = 3140 NUMBND = 45 NUMANG = 91 NPTRA = 33 NATYP = 31 NPHB = 1 IFBOX = 1 NMXRS = 25 IFCAP = 0 NEXTRA = 0 NCOPY = 0 | Coordinate Index Table dimensions: 11 11 11 | Direct force subcell size = 5.6573 5.6573 5.6573 BOX TYPE: RECTILINEAR -------------------------------------------------------------------------------- 2. CONTROL DATA FOR THE RUN -------------------------------------------------------------------------------- General flags: imin = 0, nmropt = 0 Nature and format of input: ntx = 7, irest = 1, ntrx = 1 Nature and format of output: ntxo = 2, ntpr = 1000, ntrx = 1, ntwr = 10000 iwrap = 0, ntwx = 0, ntwv = 0, ntwe = 0 ioutfm = 1, ntwprt = 0, idecomp = 0, rbornstat= 0 Potential function: ntf = 2, ntb = 1, igb = 0, nsnb = 25 ipol = 0, gbsa = 0, iesp = 0 dielc = 1.00000, cut = 9.00000, intdiel = 1.00000 Frozen or restrained atoms: ibelly = 0, ntr = 0 Molecular dynamics: nstlim = 9999, nscm = 1000, nrespa = 1 t = 0.00000, dt = 0.00100, vlimit = -1.00000 SHAKE: ntc = 2, jfastw = 0 tol = 0.00000 | Intermolecular bonds treatment: | no_intermolecular_bonds = 1 | Energy averages sample interval: | ene_avg_sampling = 1000 Ewald parameters: verbose = 0, ew_type = 0, nbflag = 1, use_pme = 1 vdwmeth = 1, eedmeth = 1, netfrc = 1 Box X = 62.230 Box Y = 62.230 Box Z = 62.230 Alpha = 90.000 Beta = 90.000 Gamma = 90.000 NFFT1 = 64 NFFT2 = 64 NFFT3 = 64 Cutoff= 9.000 Tol =0.100E-04 Ewald Coefficient = 0.30768 Interpolation order = 4 | PMEMD ewald parallel performance parameters: | block_fft = 0 | fft_blk_y_divisor = 2 | excl_recip = 0 | excl_master = 0 | atm_redist_freq = 320 -------------------------------------------------------------------------------- 3. ATOMIC COORDINATES AND VELOCITIES -------------------------------------------------------------------------------- begin time read from input coords = 6.000 ps Number of triangulated 3-point waters found: 7023 Sum of charges from parm topology file = -11.00000006 Assuming uniform neutralizing plasma | Dynamic Memory, Types Used: | Reals 1006341 | Integers 1058213 | Nonbonded Pairs Initial Allocation: 3562853 | GPU memory information (estimate): | KB of GPU memory in use: 40656 | KB of CPU memory in use: 30450 | Running AMBER/MPI version on 2 MPI task -------------------------------------------------------------------------------- 4. RESULTS -------------------------------------------------------------------------------- --------------------------------------------------- APPROXIMATING switch and d/dx switch using CUBIC SPLINE INTERPOLATION using 5000.0 points per unit in tabled values TESTING RELATIVE ERROR over r ranging from 0.0 to cutoff | CHECK switch(x): max rel err = 0.2738E-14 at 2.422500 | CHECK d/dx switch(x): max rel err = 0.8314E-11 at 2.736960 --------------------------------------------------- |--------------------------------------------------- | APPROXIMATING direct energy using CUBIC SPLINE INTERPOLATION | with 50.0 points per unit in tabled values | Relative Error Limit not exceeded for r .gt. 2.39 | APPROXIMATING direct force using CUBIC SPLINE INTERPOLATION | with 50.0 points per unit in tabled values | Relative Error Limit not exceeded for r .gt. 2.84 |--------------------------------------------------- check COM velocity, temp: 0.003908 0.02(Removed) NSTEP = 1000 TIME(PS) = 7.000 TEMP(K) = 295.39 PRESS = 0.0 Etot = -58174.0232 EKtot = 14199.9375 EPtot = -72373.9607 BOND = 442.9978 ANGLE = 1159.4913 DIHED = 1002.5295 1-4 NB = 563.9211 1-4 EEL = 6606.8879 VDWAALS = 8371.0644 EELEC = -90520.8528 EHBOND = 0.0000 RESTRAINT = 0.0000 ------------------------------------------------------------------------------ check COM velocity, temp: 0.000012 0.00(Removed) NSTEP = 2000 TIME(PS) = 8.000 TEMP(K) = 295.86 PRESS = 0.0 Etot = -58174.1719 EKtot = 14222.3418 EPtot = -72396.5137 BOND = 426.6080 ANGLE = 1162.4957 DIHED = 964.9336 1-4 NB = 547.5774 1-4 EEL = 6626.2328 VDWAALS = 8402.0387 EELEC = -90526.3998 EHBOND = 0.0000 RESTRAINT = 0.0000 ------------------------------------------------------------------------------ check COM velocity, temp: 0.000014 0.00(Removed) NSTEP = 3000 TIME(PS) = 9.000 TEMP(K) = 299.72 PRESS = 0.0 Etot = -58175.8628 EKtot = 14407.7988 EPtot = -72583.6616 BOND = 446.4758 ANGLE = 1148.2248 DIHED = 984.9923 1-4 NB = 555.6727 1-4 EEL = 6630.1631 VDWAALS = 8484.4650 EELEC = -90833.6554 EHBOND = 0.0000 RESTRAINT = 0.0000 ------------------------------------------------------------------------------ check COM velocity, temp: 0.000009 0.00(Removed) NSTEP = 4000 TIME(PS) = 10.000 TEMP(K) = 297.22 PRESS = 0.0 Etot = -58175.0533 EKtot = 14287.8418 EPtot = -72462.8951 BOND = 434.9183 ANGLE = 1182.0080 DIHED = 980.7278 1-4 NB = 557.5086 1-4 EEL = 6646.3185 VDWAALS = 8272.5564 EELEC = -90536.9327 EHBOND = 0.0000 RESTRAINT = 0.0000 ------------------------------------------------------------------------------ check COM velocity, temp: 0.000010 0.00(Removed) NSTEP = 5000 TIME(PS) = 11.000 TEMP(K) = 297.79 PRESS = 0.0 Etot = -58175.9374 EKtot = 14315.1260 EPtot = -72491.0634 BOND = 437.9008 ANGLE = 1191.1034 DIHED = 995.7533 1-4 NB = 547.0980 1-4 EEL = 6576.5090 VDWAALS = 8514.5624 EELEC = -90753.9902 EHBOND = 0.0000 RESTRAINT = 0.0000 ------------------------------------------------------------------------------ check COM velocity, temp: 0.000005 0.00(Removed) NSTEP = 6000 TIME(PS) = 12.000 TEMP(K) = 299.56 PRESS = 0.0 Etot = -58175.7858 EKtot = 14399.9746 EPtot = -72575.7604 BOND = 423.7906 ANGLE = 1217.8038 DIHED = 1003.9381 1-4 NB = 540.0709 1-4 EEL = 6635.4437 VDWAALS = 8326.7171 EELEC = -90723.5247 EHBOND = 0.0000 RESTRAINT = 0.0000 ------------------------------------------------------------------------------ ----------------------- Resources requested: nodes=2:ppn=20:gpus=1 ----------------------- Resources used: cput=00:00:34 walltime=00:00:28 mem=0.559 GB vmem=173.589 GB ----------------------- Resource units charged (estimate): 0.031 RUs -----------------------