netty: reduce default worker ELG thread number by half, to number of CPU cores (#6439)

This would cut the amount of per-thread direct buffer allocations by half, especially with light traffic. This will also cut the amount of file descriptors that's created per thread by half.

Internal benchmark results (median of 5 runs) doesn't show any significant change:
```
                          Before (STDEV)           After (STDEV)
grpc-java-java-multi-qps-integrity_only
Actual QPS               711,004 (6,246)         704,372 (6,873) 
QPS per Client CPU        23,921   (252)          24,188   (252)

grpc-java-java-multi-throughput-integrity_only
Actual QPS                35,326    (48)          35,294    (29) 
QPS per Client CPU         3,362    (17)           3,440    (13)

grpc-java-java-single-latency-integrity_only
Median latency (us)          127  (2.77)             129  (3.13)

grpc-java-java-single-throughput-integrity_only
Actual QPS                   581 (11.60)             590  (7.08)
QPS per Client CPU           490 (10.98)             498  (5.63)
```
This commit is contained in:
Kun Zhang 2019-11-18 16:26:37 -08:00 committed by GitHub
parent 94eb93bd3a
commit d77419557a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 11 additions and 1 deletions

View File

@ -49,6 +49,7 @@ import io.netty.channel.socket.nio.NioSocketChannel;
import io.netty.handler.codec.http2.Http2Exception; import io.netty.handler.codec.http2.Http2Exception;
import io.netty.handler.codec.http2.Http2Headers; import io.netty.handler.codec.http2.Http2Headers;
import io.netty.util.AsciiString; import io.netty.util.AsciiString;
import io.netty.util.NettyRuntime;
import io.netty.util.concurrent.DefaultThreadFactory; import io.netty.util.concurrent.DefaultThreadFactory;
import java.io.IOException; import java.io.IOException;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
@ -372,7 +373,16 @@ class Utils {
DefaultEventLoopGroupResource( DefaultEventLoopGroupResource(
int numEventLoops, String name, EventLoopGroupType eventLoopGroupType) { int numEventLoops, String name, EventLoopGroupType eventLoopGroupType) {
this.name = name; this.name = name;
this.numEventLoops = numEventLoops; // See the implementation of MultithreadEventLoopGroup. DEFAULT_EVENT_LOOP_THREADS there
// defaults to NettyRuntime.availableProcessors() * 2. We don't think we need that many
// threads. The overhead of a thread includes file descriptors and at least one chunk
// allocation from PooledByteBufAllocator. Here we reduce the default number of threads by
// half.
if (numEventLoops == 0 && System.getProperty("io.netty.eventLoopThreads") == null) {
this.numEventLoops = NettyRuntime.availableProcessors();
} else {
this.numEventLoops = numEventLoops;
}
this.eventLoopGroupType = eventLoopGroupType; this.eventLoopGroupType = eventLoopGroupType;
} }