我正在寻找MemoryStream的实现,它不会将内存分配为一个大块,而是一个块的集合.我想在内存(64位)中存储几GB的数据,并避免内存碎片的限制.
Dan*_*ker 10
像这样的东西:
class ChunkedMemoryStream : Stream
{
private readonly List<byte[]> _chunks = new List<byte[]>();
private int _positionChunk;
private int _positionOffset;
private long _position;
public override bool CanRead
{
get { return true; }
}
public override bool CanSeek
{
get { return true; }
}
public override bool CanWrite
{
get { return true; }
}
public override void Flush() { }
public override long Length
{
get { return _chunks.Sum(c => c.Length); }
}
public override long Position
{
get
{
return _position;
}
set
{
_position = value;
_positionChunk = 0;
while (_positionOffset != 0)
{
if (_positionChunk >= _chunks.Count)
throw new OverflowException();
if (_positionOffset < _chunks[_positionChunk].Length)
return;
_positionOffset -= _chunks[_positionChunk].Length;
_positionChunk++;
}
}
}
public override int Read(byte[] buffer, int offset, int count)
{
int result = 0;
while ((count != 0) && (_positionChunk != _chunks.Count))
{
int fromChunk = Math.Min(count, _chunks[_positionChunk].Length - _positionOffset);
if (fromChunk != 0)
{
Array.Copy(_chunks[_positionChunk], _positionOffset, buffer, offset, fromChunk);
offset += fromChunk;
count -= fromChunk;
result += fromChunk;
_position += fromChunk;
}
_positionOffset = 0;
_positionChunk++;
}
return result;
}
public override long Seek(long offset, SeekOrigin origin)
{
long newPos = 0;
switch (origin)
{
case SeekOrigin.Begin:
newPos = offset;
break;
case SeekOrigin.Current:
newPos = Position + offset;
break;
case SeekOrigin.End:
newPos = Length - offset;
break;
}
Position = Math.Max(0, Math.Min(newPos, Length));
return newPos;
}
public override void SetLength(long value)
{
throw new NotImplementedException();
}
public override void Write(byte[] buffer, int offset, int count)
{
while ((count != 0) && (_positionChunk != _chunks.Count))
{
int toChunk = Math.Min(count, _chunks[_positionChunk].Length - _positionOffset);
if (toChunk != 0)
{
Array.Copy(buffer, offset, _chunks[_positionChunk], _positionOffset, toChunk);
offset += toChunk;
count -= toChunk;
_position += toChunk;
}
_positionOffset = 0;
_positionChunk++;
}
if (count != 0)
{
byte[] chunk = new byte[count];
Array.Copy(buffer, offset, chunk, 0, count);
_chunks.Add(chunk);
_positionChunk = _chunks.Count;
_position += count;
}
}
}
class Program
{
static void Main(string[] args)
{
ChunkedMemoryStream cms = new ChunkedMemoryStream();
Debug.Assert(cms.Length == 0);
Debug.Assert(cms.Position == 0);
cms.Position = 0;
byte[] helloworld = Encoding.UTF8.GetBytes("hello world");
cms.Write(helloworld, 0, 3);
cms.Write(helloworld, 3, 3);
cms.Write(helloworld, 6, 5);
Debug.Assert(cms.Length == 11);
Debug.Assert(cms.Position == 11);
cms.Position = 0;
byte[] b = new byte[20];
cms.Read(b, 3, (int)cms.Length);
Debug.Assert(b.Skip(3).Take(11).SequenceEqual(helloworld));
cms.Position = 0;
cms.Write(Encoding.UTF8.GetBytes("seeya"), 0, 5);
Debug.Assert(cms.Length == 11);
Debug.Assert(cms.Position == 5);
cms.Position = 0;
cms.Read(b, 0, (byte) cms.Length);
Debug.Assert(b.Take(11).SequenceEqual(Encoding.UTF8.GetBytes("seeya world")));
Debug.Assert(cms.Length == 11);
Debug.Assert(cms.Position == 11);
cms.Write(Encoding.UTF8.GetBytes(" again"), 0, 6);
Debug.Assert(cms.Length == 17);
Debug.Assert(cms.Position == 17);
cms.Position = 0;
cms.Read(b, 0, (byte)cms.Length);
Debug.Assert(b.Take(17).SequenceEqual(Encoding.UTF8.GetBytes("seeya world again")));
}
}
Run Code Online (Sandbox Code Playgroud)
您需要首先确定虚拟地址碎片是否是问题.
如果你是64位机器(你似乎表明你是),我非常怀疑它是什么.每个64位进程几乎都有可用的整个64位虚拟内存空间,您唯一担心的是虚拟地址空间碎片而不是物理内存碎片(这是操作系统必须担心的).操作系统内存管理器已经将内存分页.对于可预见的未来,在物理内存耗尽之前,您不会耗尽虚拟地址空间.在我们退休之前,这不太可能发生变化.
如果您有32位地址空间,然后在GB ramge中分配连续的大块内存,您将很快遇到碎片问题.在CLR中没有库存块分配内存流.ASP.NET中有一个(由于其他原因)但它无法访问.如果你必须走这条路,你可能最好自己写一个,因为你的应用程序的使用模式不太可能与许多其他类似,并且试图将你的数据放入32位地址空间可能是你的性能瓶颈.
如果您正在操作GB数据,我强烈建议您需要64位进程.无论您是多么敏锐,它都会比手动解决方案更好地完成32位地址空间碎片.
Bing团队发布了RecyclableMemoryStream并在此处写了相关内容.他们引用的好处是: