Ken*_*ny6 6 freeze large-data mongodb rocksdb
We have large MongoDB database (about 1,4mln collections), MongoDB 3.0, engine rocksDB, operating system Ubuntu 14.04.
This DB is located on virtual machine (VmWare vCloud) with 16 cores and 108 GB RAM (currently mongoDB used 70GB of memory without swap).
Production setup options:
DB stats:
{
"db" : "ctp",
"collections" : 1369486,
"objects" : 20566852,
"avgObjSize" : 1126.82749999854,
"dataSize" : 23175294422,
"storageSize" : 23231888384,
"numExtents" : 0,
"indexes" : 6686175,
"indexSize" : 685981393,
"ok" : 1
}
Run Code Online (Sandbox Code Playgroud)
Sample collections sizes:
{
"ns" : "ctp._cf123_ct49_dfc-r_dtc-r_tof2_groupat",
"count" : 33,
"size" : 38172,
"avgObjSize" : 1156,
"storageSize" : 38144,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 6312,
"indexSizes" : {
"_id_" : 18,
"exAt" : 16,
"unique" : 6246,
"_smp" : 10,
"_smpdf" : 22
},
"ok" : 1
}
{
"ns" : "ctp._afpoznan123_atlondyn49_df2016-09_dt2016-09_tof2_groupdfdt",
"count" : 188,
"size" : 208677,
"avgObjSize" : 1109,
"storageSize" : 208640,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 7945,
"indexSizes" : {
"_id_" : 2845,
"exAt" : 256,
"_smp" : 160,
"_smpdf" : 352,
"unique" : 4332
},
"ok" : 1
}
{
"ns" : "ctp._cf123_ct42_dfc-r_dtc-r_tof2_groupat",
"count" : 27,
"size" : 30400,
"avgObjSize" : 1125,
"storageSize" : 30208,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 84,
"indexSizes" : {
"_id_" : 18,
"exAt" : 16,
"unique" : 18,
"_smp" : 10,
"_smpdf" : 22
},
"ok" : 1
}
Run Code Online (Sandbox Code Playgroud)
Periodically every 5 minutes is running script which writes to these collections and creates new one if this collection doesn't exists (collections names are based on data which are inside these collections) and creating indexes.
We've noticed that this server has some freezes during writing data to collections. This kind of freeze can take from 5 to 60 seconds.
有没有人遇到过这个问题,可以帮助我们?
以下是冻结时刻的一些日志:
db.currentOP();
"opid" : 22717868,
"active" : false,
"op" : "query",
"ns" : "ctp._cf115_atboma25_dfc-r_dtc-r_tof2_groupdfym",
"query" : {
"$query" : {
"T#df" : {
"$lt" : "2017-02-28"
}
},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33832",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(22004831)
}
}
}
},
{
"desc" : "conn135907",
"threadId" : "0xc3e5d64e0",
"connectionId" : 135907,
"opid" : 22719375,
"active" : true,
"secs_running" : 0,
"microsecs_running" : NumberLong(223601),
"op" : "query",
"ns" : "top_search.top_searches",
"query" : {
"$msg" : "query not recording (too large)"
},
"planSummary" : "IXSCAN { T#df: 1, T#dt: 1 }",
"client" : "192.168.1.33:33648",
"numYields" : 170,
"locks" : {
"Global" : "r",
"Database" : "r",
"Collection" : "r"
},
"waitingForLock" : false,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(342)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(171)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(171)
}
}
}
},
{
"desc" : "conn135959",
"threadId" : "0x10d4445260",
"connectionId" : 135959,
"opid" : 22718533,
"active" : false,
"op" : "query",
"ns" : "ctp._afoxford-house23_attamarindo32_dfc-r_dtc-r_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:34022",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15003580)
}
}
}
},
{
"desc" : "conn135829",
"threadId" : "0x10d4445740",
"connectionId" : 135829,
"opid" : 22717923,
"active" : false,
"op" : "query",
"ns" : "ctp._ct123_dfc-r_dtc-r_tof2_groupdfym",
"query" : {
"$query" : {
"T#df" : {
"$lt" : "2017-02-28"
}
},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33026",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(21004810)
}
}
}
},
{
"desc" : "conn135781",
"threadId" : "0x2d678e0",
"connectionId" : 135781,
"opid" : 22718920,
"active" : false,
"op" : "query",
"ns" : "ctp._cf1147_atrostock36_df2016-06_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60874",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(12002770)
}
}
}
},
{
"desc" : "conn135870",
"threadId" : "0xd04ed5d40",
"connectionId" : 135870,
"opid" : 22719172,
"active" : false,
"op" : "query",
"ns" : "ctp._cf61_atpristina131_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33369",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(7001590)
}
}
}
},
{
"desc" : "conn135687",
"threadId" : "0xc3e5d7380",
"connectionId" : 135687,
"opid" : 22717925,
"active" : false,
"op" : "query",
"ns" : "ctp._cf105_athana156_df2016-06_dt2016-06_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60022",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(21003871)
}
}
}
},
{
"desc" : "conn135754",
"threadId" : "0xd04ed5860",
"connectionId" : 135754,
"opid" : 22718485,
"active" : false,
"op" : "query",
"ns" : "ctp._cf5_atdhaka1113_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60603",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15503084)
}
}
}
},
{
"desc" : "conn135644",
"threadId" : "0xc3e5d9c20",
"connectionId" : 135644,
"opid" : 22719073,
"active" : false,
"op" : "query",
"ns" : "ctp._afgenua71_ataarhus37_dfc-r_dtc-r_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:59698",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(7501602)
}
}
}
},
{
"desc" : "conn135891",
"threadId" : "0xd04ed7a80",
"connectionId" : 135891,
"opid" : 22719284,
"active" : false,
"op" : "query",
"ns" : "ctp._attianjin30_dfc-r_dtc-r_tof2_groupcf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33530",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(3000658)
}
}
}
},
{
"desc" : "conn135673",
"threadId" : "0xd04ed6220",
"connectionId" : 135673,
"opid" : 22718185,
"active" : false,
"op" : "query",
"ns" : "ctp._afwroclaw123_atlondyn49_df2016-06_dt2016-06_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:59925",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(16503737)
}
}
}
},
{
"desc" : "conn135989",
"threadId" : "0x10d44443c0",
"connectionId" : 135989,
"opid" : 22719240,
"active" : false,
"op" : "query",
"ns" : "ctp._cf28_atmarakesz93_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:34367",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(4500947)
}
}
}
},
{
"desc" : "conn135410",
"threadId" : "0x2d66220",
"connectionId" : 135410,
"opid" : 22717853,
"active" : true,
"secs_running" : 22,
"microsecs_running" : NumberLong(22406019),
"op" : "query",
"ns" : "ctp.$cmd",
"query" : {
"createIndexes" : "_cf71_df2016-07_dt2016-11_tof2_groupct",
"indexes" : [
{
"key" : {
"expireAt" : 1
},
"name" : "exAt",
"background" : true,
"expireAfterSeconds" : 0
}
]
},
"client" : "0.0.0.0:0",
"numYields" : 0,
"locks" : {
"Global" : "w",
"Database" : "W"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(376227),
"w" : NumberLong(15477)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(180375),
"w" : NumberLong(15476),
"W" : NumberLong(1)
},
"acquireWaitCount" : {
"W" : NumberLong(1)
},
"timeAcquiringMicros" : {
"W" : NumberLong(22004935)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(180375),
"w" : NumberLong(15476)
}
}
}
},
{
"desc" : "conn135961",
"threadId" : "0x10d4442b60",
"connectionId" : 135961,
"opid" : 22718537,
"active" : false,
"op" : "query",
"ns" : "ctp._cf5_atattawapiskat23_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:34029",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15002978)
}
}
}
},
{
"desc" : "conn135905",
"threadId" : "0xc3e5d6000",
"connectionId" : 135905,
"opid" : 22718186,
"active" : false,
"op" : "query",
"ns" : "ctp._afwarszawa123_atdubrownik61_df2016-08_dt2016-08_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:33638",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(16503305)
}
}
}
}
]
Run Code Online (Sandbox Code Playgroud)
db.serverStatus()["rocksdb"];
{
"stats" : [
"",
"** Compaction Stats [default] **",
"Level Files Size(MB) Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) Comp(cnt) Avg(sec) Stall(cnt) KeyIn KeyDrop",
"---------------------------------------------------------------------------------------------------------------------------------------------------------------------",
" L0 0/0 0.00 0.0 0.0 0.0 0.0 1.4 1.4 0.0 0.0 0.0 120.1 12 39 0.312 0 0 0",
" L4 0/0 0.00 0.0 1.8 1.8 0.0 1.7 1.7 0.0 1.0 102.0 99.7 18 11 1.606 7 21M 153K",
" L5 15/0 620.47 1.0 6.6 1.4 5.2 5.5 0.3 0.0 3.9 44.4 37.0 152 25 6.086 0 110M 840K",
" L6 106/0 6401.43 0.0 3.5 0.3 3.3 3.3 -0.0 0.0 12.6 25.9 23.7 140 7 20.057 0 162M 14M",
" Sum 121/0 7021.90 0.0 11.9 3.4 8.5 11.9 3.4 0.0 8.3 37.8 37.8 322 82 3.932 7 295M 15M",
" Int 0/0 0.00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0.000 0 0 0",
"Flush(GB): cumulative 1.429, interval 0.000",
"Stalls(count): 0 level0_slowdown, 0 level0_slowdown_with_compaction, 0 level0_numfiles, 0 level0_numfiles_with_compaction, 0 pending_compaction_bytes, 0 memtable_compaction, 7 leveln_slowdown_soft, 0 leveln_slowdown_hard",
"",
"** DB Stats **",
"Uptime(secs): 34952.0 total, 0.2 interval",
"Cumulative writes: 4990K writes, 17M keys, 4989K batches, 1.0 writes per batch, ingest: 2.02 GB, 0.06 MB/s",
"Cumulative WAL: 4990K writes, 0 syncs, 4990122.00 writes per sync, written: 2.02 GB, 0.06 MB/s",
"Cumulative compaction: 11.90 GB write, 0.35 MB/s write, 11.90 GB read, 0.35 MB/s read, 322.4 seconds",
"Cumulative stall: 00:00:3.548 H:M:S, 0.0 percent",
"Interval writes: 0 writes, 0 keys, 0 batches, 0.0 writes per batch, ingest: 0.00 MB, 0.00 MB/s",
"Interval WAL: 0 writes, 0 syncs, 0.00 writes per sync, written: 0.00 MB, 0.00 MB/s",
"Interval compaction: 0.00 GB write, 0.00 MB/s write, 0.00 GB read, 0.00 MB/s read, 0.0 seconds",
"Interval stall: 00:00:0.000 H:M:S, 0.0 percent"
],
"num-immutable-mem-table" : "0",
"mem-table-flush-pending" : "0",
"compaction-pending" : "0",
"background-errors" : "0",
"cur-size-active-mem-table" : "33MB",
"cur-size-all-mem-tables" : "33MB",
"num-entries-active-mem-table" : "185495",
"num-entries-imm-mem-tables" : "0",
"estimate-table-readers-mem" : "91MB",
"num-snapshots" : "1",
"oldest-snapshot-time" : "1465911051",
"num-live-versions" : "1",
"total-live-recovery-units" : 60,
"block-cache-usage" : "34GB",
"transaction-engine-keys" : NumberLong(4210),
"transaction-engine-snapshots" : NumberLong(1),
"thread-status" : []
}
Run Code Online (Sandbox Code Playgroud)
db.serverStatus()['globalLock'];
{
"totalTime" : NumberLong(34952090000),
"currentQueue" : {
"total" : 57,
"readers" : 56,
"writers" : 1
},
"activeClients" : {
"total" : 124,
"readers" : 57,
"writers" : 1
}
}
Run Code Online (Sandbox Code Playgroud)
最好的祝福
创建新的集合没有意义。这是一个工程缺陷。这个数量的集合肯定会冻结数据库服务器,因为它被迫遍历可用集合的列表并检查列表中是否存在集合。另外,我相信还涉及一些操作,例如元数据更新。
从操作日志可以清楚地看出,构建索引需要大量时间
"query" : {
"createIndexes" : "_cf71_df2016-07_dt2016-11_tof2_groupct",
"indexes" : [
{
"key" : {
"expireAt" : 1
},
"name" : "exAt",
"background" : true,
"expireAfterSeconds" : 0
}
]
}
Run Code Online (Sandbox Code Playgroud)
此外,获取锁之类的事情需要时间,这对于创建新的命名空间和索引构建是必要的。同样,像这样的参数"expireAfterSeconds" : 0
在这样的规模上并不明智。基本上,您必须每 60 秒运行 1.4M 个计时器来查找和修剪过期记录,并确保重建索引(请参阅 mongostat 上的那些删除)。
请考虑进行更改以减少集合数量或在部署之间拆分集合。您还可以删除不活动集合的索引。另外,明智的做法是删除所有空的和未使用的集合。
为了立即解决问题,您必须找到当前的瓶颈:RAM、CPU 或 IOpS。你有额外的RAM,你可以把它给MongoDB,它将有助于缓存更多并避免不必要的读取。通过测量您的 IOpS,iostat
了解您的驱动器的繁忙程度。
归档时间: |
|
查看次数: |
965 次 |
最近记录: |