...
BugZero found this defect 2720 days ago.
I have a 3 replicaset mongo cluster version 3.2.10, Recently we are haveing an issue of OOM killer killing mongod process , we upgraded the version to c3.2xlarge in aws and the crash still happened. we did not have a swap space configured and have configured it after the crash. mm shows about 300 connections to mongo , I am not clear why mongo eats up so much ram inspite of the DB size being so small for the OOM killer to kill it. I did see an expensive query that ran for 2 secs that might have did a full table scan that might have run just before the OOM killer kill The incident happened about 9.54 AM PST on Aug 26 attaching mongo logs and diagnostic data https://drive.google.com/file/d/0B0DcYD8YgYOJMmZTcVNLUXlTdDQ/view?usp=sharing https://drive.google.com/file/d/0B0DcYD8YgYOJYjhMczRxOFlRNEU/view?usp=sharing https://cloud.mongodb.com/v2/50366375f1a5dd0b002fab66#host/replicaSet/5615733ee4b009c743f75edf region_qa_11:SECONDARY> db.runCommand( { buildInfo: 1 } ) { "version" : "3.2.10", "gitVersion" : "79d9b3ab5ce20f51c272b4411202710a082d0317", "modules" : [ ], "allocator" : "tcmalloc", "javascriptEngine" : "mozjs", "sysInfo" : "deprecated", "versionArray" : [ 3, 2, 10, 0 ], "openssl" : { "running" : "OpenSSL 1.0.0-fips 29 Mar 2010", "compiled" : "OpenSSL 1.0.1e-fips 11 Feb 2013" }, "buildEnvironment" : { "distmod" : "amazon", "distarch" : "x86_64", "cc" : "/opt/mongodbtoolchain/bin/gcc: gcc (GCC) 4.8.2", "ccflags" : "-fno-omit-frame-pointer -fPIC -fno-strict-aliasing -ggdb -pthread -Wall -Wsign-compare -Wno-unknown-pragmas -Winvalid-pch -Werror -O2 -Wno-unused-local-typedefs -Wno-unused-function -Wno-deprecated-declarations -Wno-unused-but-set-variable -Wno-missing-braces -fno-builtin-memcmp", "cxx" : "/opt/mongodbtoolchain/bin/g++: g++ (GCC) 4.8.2", "cxxflags" : "-Wnon-virtual-dtor -Woverloaded-virtual -Wno-maybe-uninitialized -std=c++11", "linkflags" : "-fPIC -pthread -Wl,-z,now -rdynamic -fuse-ld=gold -Wl,-z,noexecstack -Wl,--warn-execstack", "target_arch" : "x86_64", "target_os" : "linux" }, "bits" : 64, "debug" : false, "maxBsonObjectSize" : 16777216, "storageEngines" : [ "devnull", "ephemeralForTest", "mmapv1", "wiredTiger" ], "ok" : 1 } db stats { "db" : "region", "collections" : 6, "objects" : 5491878, "avgObjSize" : 406.2433517277696, "dataSize" : 2231038926, "storageSize" : 1094828032, "numExtents" : 0, "indexes" : 43, "indexSize" : 875229184, "ok" : 1 }
thomas.schubert commented on Fri, 29 Sep 2017 18:36:21 +0000: Hi snarasimhan, We still need additional information to diagnose the problem. If this is still an issue for you, would you please provide us access to the syslog? Thank you, Kelsey thomas.schubert commented on Thu, 7 Sep 2017 18:50:38 +0000: Hi snarasimhan, I do not have access to view these files. Would you please upload the requested files to our secure upload portal? Thank you, Thomas snarasimhan@edmunds.com commented on Mon, 28 Aug 2017 16:25:16 +0000: https://docs.google.com/a/edmunds.com/document/d/1JUaKy0AmntEhqND6jaGCUPfEgsYA2qIR66BS1GilKag/edit?usp=sharing thomas.schubert commented on Mon, 28 Aug 2017 16:12:35 +0000: Hi snarasimhan, Would you please provide the syslog covering this event? Thank you, Thomas snarasimhan@edmunds.com commented on Sun, 27 Aug 2017 05:54:13 +0000: I was using the default wiredTiger cacheSize , have restricted the cacheSize to 3G (slightly more than compressed data size)