"Chunked"MemoryStream

Kar*_*nda 14 .net c#

我正在寻找MemoryStream的实现,它不会将内存分配为一个大块,而是一个块的集合.我想在内存(64位)中存储几GB的数据,并避免内存碎片的限制.

Dan*_*ker 10

像这样的东西:

class ChunkedMemoryStream : Stream
{
    private readonly List<byte[]> _chunks = new List<byte[]>();
    private int _positionChunk;
    private int _positionOffset;
    private long _position;

    public override bool CanRead
    {
        get { return true; }
    }

    public override bool CanSeek
    {
        get { return true; }
    }

    public override bool CanWrite
    {
        get { return true; }
    }

    public override void Flush() { }

    public override long Length
    {
        get { return _chunks.Sum(c => c.Length); }
    }

    public override long Position
    {
        get
        {
            return _position;
        }
        set
        {
            _position = value;

            _positionChunk = 0;

            while (_positionOffset != 0)
            {
                if (_positionChunk >= _chunks.Count)
                    throw new OverflowException();

                if (_positionOffset < _chunks[_positionChunk].Length)
                    return;

                _positionOffset -= _chunks[_positionChunk].Length;
                _positionChunk++;
            }
        }
    }

    public override int Read(byte[] buffer, int offset, int count)
    {
        int result = 0;
        while ((count != 0) && (_positionChunk != _chunks.Count))
        {
            int fromChunk = Math.Min(count, _chunks[_positionChunk].Length - _positionOffset);
            if (fromChunk != 0)
            {
                Array.Copy(_chunks[_positionChunk], _positionOffset, buffer, offset, fromChunk);
                offset += fromChunk;
                count -= fromChunk;
                result += fromChunk;
                _position += fromChunk;
            }

            _positionOffset = 0;
            _positionChunk++;
        }
        return result;
    }

    public override long Seek(long offset, SeekOrigin origin)
    {
        long newPos = 0;

        switch (origin)
        {
            case SeekOrigin.Begin:
                newPos = offset;
                break;
            case SeekOrigin.Current:
                newPos = Position + offset;
                break;
            case SeekOrigin.End:
                newPos = Length - offset;
                break;
        }

        Position = Math.Max(0, Math.Min(newPos, Length));
        return newPos;
    }

    public override void SetLength(long value)
    {
        throw new NotImplementedException();
    }

    public override void Write(byte[] buffer, int offset, int count)
    {
        while ((count != 0) && (_positionChunk != _chunks.Count))
        {
            int toChunk = Math.Min(count, _chunks[_positionChunk].Length - _positionOffset);
            if (toChunk != 0)
            {
                Array.Copy(buffer, offset, _chunks[_positionChunk], _positionOffset, toChunk);
                offset += toChunk;
                count -= toChunk;
                _position += toChunk;
            }

            _positionOffset = 0;
            _positionChunk++;
        }

        if (count != 0)
        {
            byte[] chunk = new byte[count];
            Array.Copy(buffer, offset, chunk, 0, count);
            _chunks.Add(chunk);
            _positionChunk = _chunks.Count;
            _position += count;
        }
    }
}

class Program
{
    static void Main(string[] args)
    {
        ChunkedMemoryStream cms = new ChunkedMemoryStream();

        Debug.Assert(cms.Length == 0);
        Debug.Assert(cms.Position == 0);

        cms.Position = 0;

        byte[] helloworld = Encoding.UTF8.GetBytes("hello world");

        cms.Write(helloworld, 0, 3);
        cms.Write(helloworld, 3, 3);
        cms.Write(helloworld, 6, 5);

        Debug.Assert(cms.Length == 11);
        Debug.Assert(cms.Position == 11);

        cms.Position = 0;

        byte[] b = new byte[20];
        cms.Read(b, 3, (int)cms.Length);
        Debug.Assert(b.Skip(3).Take(11).SequenceEqual(helloworld));

        cms.Position = 0;
        cms.Write(Encoding.UTF8.GetBytes("seeya"), 0, 5);

        Debug.Assert(cms.Length == 11);
        Debug.Assert(cms.Position == 5);

        cms.Position = 0;
        cms.Read(b, 0, (byte) cms.Length);
        Debug.Assert(b.Take(11).SequenceEqual(Encoding.UTF8.GetBytes("seeya world")));

        Debug.Assert(cms.Length == 11);
        Debug.Assert(cms.Position == 11);

        cms.Write(Encoding.UTF8.GetBytes(" again"), 0, 6);

        Debug.Assert(cms.Length == 17);
        Debug.Assert(cms.Position == 17);

        cms.Position = 0;
        cms.Read(b, 0, (byte)cms.Length);
        Debug.Assert(b.Take(17).SequenceEqual(Encoding.UTF8.GetBytes("seeya world again")));

    }
}
Run Code Online (Sandbox Code Playgroud)

  • 另请参阅:http://referencesource.microsoft.com/#System.Runtime.Remoting/channels/core/chunkedmemorystream.cs (2认同)

chu*_*ckj 8

您需要首先确定虚拟地址碎片是否是问题.

如果你是64位机器(你似乎表明你是),我非常怀疑它是什么.每个64位进程几乎都有可用的整个64位虚拟内存空间,您唯一担心的是虚拟地址空间碎片而不是物理内存碎片(这是操作系统必须担心的).操作系统内存管理器已经将内存分页.对于可预见的未来,在物理内存耗尽之前,您不会耗尽虚拟地址空间.在我们退休之前,这不太可能发生变化.

如果您有32位地址空间,然后在GB ramge中分配连续的大块内存,您将很快遇到碎片问题.在CLR中没有库存块分配内存流.ASP.NET中有一个(由于其他原因)但它无法访问.如果你必须走这条路,你可能最好自己写一个,因为你的应用程序的使用模式不太可能与许多其他类似,并且试图将你的数据放入32位地址空间可能是你的性能瓶颈.

如果您正在操作GB数据,我强烈建议您需要64位进程.无论您是多么敏锐,它都会比手动解决方案更好地完成32位地址空间碎片.


Eri*_*Law 8

Bing团队发布了RecyclableMemoryStream在此处写了相关内容.他们引用的好处是:

  1. 使用池化缓冲区消除大对象堆分配
  2. 导致2代GC的数量要少得多,并且由于GC而停顿的时间要少得多
  3. 通过限制池大小避免内存泄漏
  4. 避免内存碎片
  5. 提供出色的可调试性
  6. 提供绩效跟踪指标