有没有用python反序列化的库,用java序列化

Pra*_*abu 12 python java serialization

有没有用python反序列化的库,用java序列化?

Col*_*son 9

这是一个非常非常粗略的实现,不需要任何导入.这里棘手的是一些类有自定义序列化器,需要原始代码反序列化.此外,我的特定文件是gzip'd虽然我没有包含代码.我用这两页作为参考:

http://www.javaworld.com/community/node/2915 http://docs.oracle.com/javase/7/docs/platform/serialization/spec/protocol.html

#!/usr/bin/python

def parse(f):
    h = lambda s: ' '.join('%.2X' % ord(x) for x in s) # format as hex
    p = lambda s: sum(ord(x)*256**i for i, x in enumerate(reversed(s))) # parse integer
    magic = f.read(2)
    assert magic == '\xAC\xED', h(magic) # STREAM_MAGIC
    assert p(f.read(2)) == 5 # STREAM_VERSION
    handles = []
    def parse_obj():
        b = f.read(1)
        if not b:
            raise StopIteration # not necessarily the best thing to throw here.
        if b == '\x70': # p TC_NULL
            return None
        elif b == '\x71': # q TC_REFERENCE
            handle = p(f.read(4)) - 0x7E0000 # baseWireHandle
            o = handles[handle]
            return o[1]
        elif b == '\x74': # t TC_STRING
            string = f.read(p(f.read(2))).decode('utf-8')
            handles.append(('TC_STRING', string))
            return string
        elif b == '\x75': # u TC_ARRAY
            data = []
            cls = parse_obj()
            size = p(f.read(4))
            handles.append(('TC_ARRAY', data))
            assert cls['_name'] in ('[B', '[I'), (cls['_name'], size, f.read(50))
            for x in range(size):
                data.append(f.read({'[B': 1, '[I': 4}[cls['_name']]))
            return data
        elif b == '\x7E': # ~ TC_ENUM
            enum = {}
            enum['_cls'] = parse_obj()
            handles.append(('TC_ENUM', enum))
            enum['_name'] = parse_obj()
            return enum
        elif b == '\x72': # r TC_CLASSDESC
            cls = {'fields': []}
            full_name = f.read(p(f.read(2)))
            cls['_name'] = full_name.split('.')[-1] # i don't care about full path
            f.read(8) # uid
            cls['flags'] = f.read(1)
            handles.append(('TC_CLASSDESC', cls))
            assert cls['flags'] in ('\2', '\3', '\x0C', '\x12'), h(cls['flags'])
            b = f.read(2)
            for i in range(p(b)):
                typ = f.read(1)
                name = f.read(p(f.read(2)))
                fcls = parse_obj() if typ in 'L[' else ''
                cls['fields'].append((name, typ, fcls.split('/')[-1])) # don't care about full path
            b = f.read(1)
            assert b == '\x78', h(b)
            cls['parent'] = parse_obj()
            return cls
        # TC_OBJECT
        assert b == '\x73', (h(b), h(f.read(4)), f.read(50))
        obj = {}
        obj['_cls'] = parse_obj()
        obj['_name'] = obj['_cls']['_name']
        handle = len(handles)
        parents = [obj['_cls']]
        while parents[0]['parent']:
            parents.insert(0, parents[0]['parent'])
        handles.append(('TC_OBJECT', obj))
        for cls in parents:
            for name, typ, fcls in cls['fields'] if cls['flags'] in ('\2', '\3') else []:
                if typ == 'I': # Integer
                    obj[name] = p(f.read(4))
                elif typ == 'S': # Short
                    obj[name] = p(f.read(2))
                elif typ == 'J': # Long
                    obj[name] = p(f.read(8))
                elif typ == 'Z': # Bool
                    b = f.read(1)
                    assert p(b) in (0, 1)
                    obj[name] = bool(p(b))
                elif typ == 'F': # Float
                    obj[name] = h(f.read(4))
                elif typ in 'BC': # Byte, Char
                    obj[name] = f.read(1)
                elif typ in 'L[': # Object, Array
                    obj[name] = parse_obj()
                else: # Unknown
                    assert False, (name, typ, fcls)
            if cls['flags'] in ('\3', '\x0C'): # SC_WRITE_METHOD, SC_BLOCKDATA
                b = f.read(1)
                if b == '\x77': # see the readObject / writeObject methods
                    block = f.read(p(f.read(1)))
                    if cls['_name'].endswith('HashMap') or cls['_name'].endswith('Hashtable'):
                        # http://javasourcecode.org/html/open-source/jdk/jdk-6u23/java/util/HashMap.java.html
                        # http://javasourcecode.org/html/open-source/jdk/jdk-6u23/java/util/Hashtable.java.html
                        assert len(block) == 8, h(block)
                        size = p(block[4:])
                        obj['data'] = [] # python doesn't allow dicts as keys
                        for i in range(size):
                            k = parse_obj()
                            v = parse_obj()
                            obj['data'].append((k, v))
                        try:
                            obj['data'] = dict(obj['data'])
                        except TypeError:
                            pass # non hashable keys
                    elif cls['_name'].endswith('HashSet'):
                        # http://javasourcecode.org/html/open-source/jdk/jdk-6u23/java/util/HashSet.java.html
                        assert len(block) == 12, h(block)
                        size = p(block[-4:])
                        obj['data'] = []
                        for i in range(size):
                            obj['data'].append(parse_obj())
                    elif cls['_name'].endswith('ArrayList'):
                        # http://javasourcecode.org/html/open-source/jdk/jdk-6u23/java/util/ArrayList.java.html
                        assert len(block) == 4, h(block)
                        obj['data'] = []
                        for i in range(obj['size']):
                            obj['data'].append(parse_obj())
                    else:
                        assert False, cls['_name']
                    b = f.read(1)
                assert b == '\x78', h(b) + ' ' + repr(f.read(50)) # TC_ENDBLOCKDATA
        handles[handle] = ('py', obj)
        return obj
    objs = []
    while 1:
        try:
            objs.append(parse_obj())
        except StopIteration:
            return objs

if __name__ == '__main__':
    import sys, json
    json.dump(parse(sys.stdin), sys.stdout, indent=2)
Run Code Online (Sandbox Code Playgroud)

  • 谢谢!它使用了一个小修复(`objs.append(parse_obj(''))`=>`objs.append(parse_obj())`).我已经编辑了你的答案来改进它,并且默认情况下使它成为stdin => stdout过滤器. (2认同)

Bri*_*new 6

Java序列化是类/数据结构的表示,并与虚拟机紧密相关.因此,很难将其转换为Python世界.

  1. 您的Java程序可以在XML中进行序列化/反序列化,这是可以翻译的.查看JAXBXStream
  2. 您是否看过在Jython中运行的可能性- Java中的Python实现


Jon*_*eet 5

Java二进制序列化实际上是为与Java一起使用而设计的.要在Python中完成它,你必须让所有相关的Java类可供检查,并适当地创建Python对象 - 这将是非常可怕和脆弱的.

您最好使用跨平台序列化格式,如Thrift,Protocol Buffers,JSON或XML.如果您无法更改Java代码中使用的序列化格式,我建议编写新的Java代码,从二进制格式反序列化,然后重新序列化为跨平台格式.