I'm glad you've been having success... I hope some of it will rub off here. I've got a couple of different VMs on ESX 3.5 which are trying to run Java applications of one sort or another, and we've been seeing serious performance issues; the application just seems to stop.
Guest OS: Debian GNU/Linux 5.0 (Linux kernel 2.6.29)
Guest RAM: 4G or 8G, depending on the application
Physical server: HP BL680c, 4x quad-core Xeon, 64GB RAM.
Storage: HP EVA8100
Application 1: Lucene web search engine. The indexer process just seems to grind to a halt. The machine is not doing any significant I/O, or using any CPU. Running strace on java indexer process reveals a lot of activity, almost all of which is the futex() system call, and the occasional sendto():
futex(0x41695e58, FUTEX_WAKE_PRIVATE, 1) = 0
gettimeofday({1245245763, 884153}, NULL) = 0
gettimeofday({1245245763, 884203}, NULL) = 0
clock_gettime(CLOCK_REALTIME, {1245245763, 884239046}) = 0
futex(0x7f63fdb30114, FUTEX_WAIT_PRIVATE, 1, {0, 49963954}) = -1 ETIMEDOUT (Connection timed out)
futex(0x41695e58, FUTEX_WAKE_PRIVATE, 1) = 0
gettimeofday({1245245763, 935999}, NULL) = 0
gettimeofday({1245245763, 936041}, NULL) = 0
clock_gettime(CLOCK_REALTIME, {1245245763, 936076493}) = 0
futex(0x7f63fdb30114, FUTEX_WAIT_PRIVATE, 1, {0, 49964507}) = -1 ETIMEDOUT (Connection timed out)
futex(0x41695e58, FUTEX_WAKE_PRIVATE, 1) = 0
gettimeofday({1245245763, 987932}, NULL) = 0
gettimeofday({1245245763, 987973}, NULL) = 0
clock_gettime(CLOCK_REALTIME, {1245245763, 988008086}) = 0
futex(0x7f63fdb30114, FUTEX_WAIT_PRIVATE, 1, {0, 49964914}) = -1 ETIMEDOUT (Connection timed out)
futex(0x41695e58, FUTEX_WAKE_PRIVATE, 1) = 0
gettimeofday({1245245764, 39930}, NULL) = 0
gettimeofday({1245245764, 39971}, NULL) = 0
clock_gettime(CLOCK_REALTIME, {1245245764, 40006447}) = 0
futex(0x7f63fdb30114, FUTEX_WAIT_PRIVATE, 1, {0, 49964553}) = -1 ETIMEDOUT (Connection timed out)
futex(0x41695e58, FUTEX_WAKE_PRIVATE, 1) = 0
gettimeofday({1245245764, 90476}, NULL) = 0
gettimeofday({1245245764, 90522}, NULL) = 0
clock_gettime(CLOCK_REALTIME, {1245245764, 90557966}) = 0
futex(0x7f63fdb30114, FUTEX_WAIT_PRIVATE, 1, {0, 49964034}) = -1 ETIMEDOUT (Connection timed out)
futex(0x41695e58, FUTEX_WAKE_PRIVATE, 1) = 0
gettimeofday({1245245764, 140834}, NULL) = 0
gettimeofday({1245245764, 140876}, NULL) = 0
clock_gettime(CLOCK_REALTIME, {1245245764, 140911970}) = 0
futex(0x7f63fdb30114, FUTEX_WAIT_PRIVATE, 1, {0, 49964030}) = -1 ETIMEDOUT (Connection timed out)
futex(0x41695e58, FUTEX_WAKE_PRIVATE, 1) = 0
gettimeofday({1245245764, 191503}, NULL) = 0
gettimeofday({1245245764, 191543}, NULL) = 0
clock_gettime(CLOCK_REALTIME, {1245245764, 191579147}) = 0
futex(0x7f63fdb30114, FUTEX_WAIT_PRIVATE, 1, {0, 49963853}) = -1 ETIMEDOUT (Connection timed out)
futex(0x41695e58, FUTEX_WAKE_PRIVATE, 1) = 0
gettimeofday({1245245764, 242319}, NULL) = 0
gettimeofday({1245245764, 242360}, NULL) = 0
clock_gettime(CLOCK_REALTIME, {1245245764, 242395507}) = 0
futex(0x7f63fdb30114, FUTEX_WAIT_PRIVATE, 1, {0, 49964493} <unfinished ...>
<... restart_syscall resumed> ) = -1 ETIMEDOUT (Connection timed out)
futex(0x41c40be8, FUTEX_WAKE_PRIVATE, 1) = 0
clock_gettime(CLOCK_MONOTONIC, {88335, 123965121}) = 0
futex(0x7f63fd9e6e64, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7f63fd9e6e60, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1
clock_gettime(CLOCK_MONOTONIC, {88335, 124311258}) = 0
clock_gettime(CLOCK_MONOTONIC, {88335, 124348135}) = 0
gettimeofday({1245245764, 256967}, NULL) = 0
clock_gettime(CLOCK_REALTIME, {1245245764, 257003626}) = 0
futex(0x7f63fc95c304, FUTEX_WAIT_PRIVATE, 1, {0, 599963374} <unfinished ...>
<... futex resumed> ) = 0
futex(0x7f63fc9c5518, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f63fc7c8f54, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7f63fc7c8f50, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1
futex(0x7f63fd9e6e64, FUTEX_WAIT_PRIVATE, 7219, NULL <unfinished ...>
<... futex resumed> ) = 0
futex(0x7f63fd9e4e08, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f63fd3a08e4, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7f63fd3a08e0, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1
futex(0x7f63fc7c8f54, FUTEX_WAIT_PRIVATE, 7225, NULL <unfinished ...>
<... futex resumed> ) = 0
futex(0x7f63fde9e6a8, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f63fcf091e4, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7f63fcf091e0, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1
futex(0x7f63fd3a08e4, FUTEX_WAIT_PRIVATE, 7225, NULL <unfinished ...>
<... futex resumed> ) = 0
futex(0x7f63fcf089e8, FUTEX_WAKE_PRIVATE, 1) = 0
gettimeofday({1245245764, 257517}, NULL) = 0
futex(0x7f63fc9c7a34, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7f63fc9c7a30, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1
futex(0x7f63fdc10294, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7f63fdc10290, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1} <unfinished ...>
<... restart_syscall resumed> ) = 0
futex(0x7f63fc9c8a98, FUTEX_WAKE_PRIVATE, 1) = 0
gettimeofday({1245245764, 257689}, NULL) = 0
gettimeofday({1245245764, 257725}, NULL) = 0
clock_gettime(CLOCK_REALTIME, {1245245764, 257756521}) = 0
futex(0x7f63fc9c7a34, FUTEX_WAIT_PRIVATE, 9, {0, 599968479} <unfinished ...>
<... futex resumed> ) = 1
futex(0x7f63fcf091e4, FUTEX_WAIT_PRIVATE, 7379, NULL <unfinished ...>
<... futex resumed> ) = 0
futex(0x7f63fdc0fbd8, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f63fde63c64, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7f63fde63c60, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1
futex(0x7f63fdc10294, FUTEX_WAIT_PRIVATE, 7383, NULL <unfinished ...>
<... futex resumed> ) = 0
futex(0x7f63fc7c8168, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f63fd2fab44, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7f63fd2fab40, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1
futex(0x7f63fde63c64, FUTEX_WAIT_PRIVATE, 7289, NULL <unfinished ...>
<... futex resumed> ) = 0
futex(0x7f63fcf3e728, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f63fcee6ea4, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7f63fcee6ea0, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1
futex(0x7f63fd2fab44, FUTEX_WAIT_PRIVATE, 7283, NULL <unfinished ...>
<... futex resumed> ) = 0
futex(0x7f63fc960888, FUTEX_WAKE_PRIVATE, 1) = 0
futex(0x7f63fdbd83e4, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x7f63fdbd83e0, {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1}) = 1
futex(0x7f63fcee6ea4, FUTEX_WAIT_PRIVATE, 7297, NULL <unfinished ...>
<... futex resumed> ) = 0
futex(0x7f63fd8de328, FUTEX_WAKE_PRIVATE, 1) = 0
sendto(8, "\0\361\0&\20\376\200\0\0\0\0\0\0\2PV\377\376\236a\263\0\0\233-\1\2\243\254\355\0\5s"..., 1055, 0, {sa_family=AF_INET6, sin6_port=htons(59659), inet_pton(AF_INET6, "fe80::250:56ff:fe9e:61b3", &sin6_addr), sin6_flowinfo=32768, sin6_scope_id=if_nametoindex("lo")}, 28 <unfinished ...>
<... recvfrom resumed> "\0\361\0\"\20\376\200\0\0\0\0\0\0\2PV\377\376\236a\263\0\0\351\v\1\2\243\254\355\0\5s"..., 65535, 0, {sa_family=AF_INET6, sin6_port=htons(59659), inet_pton(AF_INET6, "fe80::250:56ff:fe9e:61b3", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=if_nametoindex("eth0")}, ) = 773
Any ideas why the application doesn't seem to be doing anything useful>
Application 2: A tomcat server
A tomcat server talking to a remote mysql database has extremely poor responsiveness. Running strace() on that process reveals the same behaviour as above. The machine doesn't appear to be doing anything at all, and yet the java process doesn't respond to the user, or do anything other than repeatedly call futex().
Any ideas? I'm at my wits' end here, and the users are getting very grumpy...
Regards,
Tim