如何在F#中实现"高效广义折叠"?

pri*_*tor 4 polymorphism f# nested fold recursive-type

Martin等人论文中.我读到了有关nestet数据类型的高效广义折叠.本文讨论了Haskell,我想在F#中尝试一下.

到目前为止,我设法遵循Nest包括实施的例子gfold.

type Pair<'a> = 'a * 'a
type Nest<'a> = Nil | Cons of 'a * Nest<Pair<'a>>

let example =
    Cons(1,
        Cons((2, 3),
            Cons(((4, 5), (6, 7)),
                Nil
            )
        )
    )

let pair (f:'a -> 'b) ((a, b):Pair<'a>) : Pair<'b> = f a, f b

let rec nest<'a, 'r> (f:'a -> 'r) : Nest<'a> -> Nest<'r> = function
    | Nil -> Nil
    | Cons(x, xs) -> Cons(f x, nest (pair f) xs)

//val gfold : e:'r -> f:('a * 'r -> 'r) -> g:(Pair<'a> -> 'a) -> _arg1:Nest<'a> -> 'r
let rec gfold e f g : Nest<'a> -> 'r = function
    | Nil -> e
    | Cons(x, xs) ->
        f(x, gfold e f g (nest g xs))

let uncurry f (a, b) = f a b

let up = uncurry (+)

let sum = example |> gfold 0 up up
Run Code Online (Sandbox Code Playgroud)

不幸的是,gfold似乎有二次复杂性,这就是作者提出的原因efold.你可能猜到,那是我无法工作的那个.在摆弄了许多类型的注释之后,我想出了这个版本只剩下一个小小的波形:

let rec efold<'a, 'b, 'r> (e:'r) (f:'a * 'r -> 'r) (g:(Pair<'a> -> Pair<'a>) -> 'a -> 'a) (h:_) (nest:Nest<'a>) : 'r =
    match nest with
    | Nil -> e
    | Cons(x, xs) -> f(h x, efold e f g ((g << pair) h) xs)
                                                        ^^
Run Code Online (Sandbox Code Playgroud)

唯一剩下的未指定类型是其中之一h.编译器推断val h : ('a -> 'a)但我认为需要有不同的类型.

提供的错误消息读取

错误类型不匹配.期待一个
Nest <'a>
但是给出一个
Nest <Pair <'a >>
当统一''a'和'Pair <'a>时,结果类型将是无限的

正确h的错误类型应该消失.但我不太了解Haskell将其转换为F#.

另见本文中关于可能的拼写错误的讨论.


更新:这是我从kvb的回答中理解的:

因此,h将输入类型转换为中间类型,例如在常规折叠中,累加器可以是不同类型的.g然后用于将两个中间类型值减少为1,同时f获取中间类型和输入类型以生成输出类型值.当然e也是那种输出类型.

h确实直接应用于递归期间遇到的值.g另一方面,仅用于使h适用于逐渐更深的类型.

只是看看第一个f例子,除了应用h和加速递归之外,它似乎没有做太多工作.但在复杂的方法中,我可以看到它是最重要的一个.什么出来,即它是工作马.

那是对的吗?

kvb*_*kvb 6

efoldHaskell中的正确定义如下:

efold :: forall n m b.
    (forall a. n a)->
    (forall a.(m a, n (Pair a)) -> n a)->
    (forall a.Pair (m a) -> m (Pair a))->
    (forall a.(a -> m b) -> Nest a -> n b) 
efold e f g h Nil = e 
efold e f g h (Cons (x,xs)) = f (h x, efold e f g (g . pair h) xs
Run Code Online (Sandbox Code Playgroud)

这不能完全普遍地转换为F#,因为nm是"更高级的类型" - 它们是在给定参数时创建类型的类型构造函数 - 在F#中不支持(并且在.NET中没有干净的表示形式) ).

解释

您的更新会询问如何解释折叠的参数.查看折叠如何工作的最简单方法可能是扩展将折叠应用于示例时会发生的情况.你会得到这样的东西:

efold e f g h example ?
    f (h 1, f ((g << pair h) (2, 3), f ((g << pair (g << pair h)) ((4,5), (6,7)), e)))
Run Code Online (Sandbox Code Playgroud)

因此,h将值映射到可以作为f 第一个文章的类型. g用于应用h到更深层嵌套对(这样我们就可以使用去h作为类型的函数a -> m bPair a -> m (Pair b)Pair (Pair a) -> m (Pair (Pair b))等),并f反复施加了脊柱的结果组合h与嵌套调用的结果f.最后,e恰好使用一次,作为最深层嵌套调用的种子f.

我认为这种解释大多与你所推断的一致. f对于组合不同层的结果肯定是至关重要的.但g也很重要,因为它告诉你如何在一个层中组合各个部分(例如,当对节点求和时,它需要对左右嵌套总和求和;如果你想使用折叠来构建一个新的嵌套值在每个级别与输入的那些相反,你将使用g看起来大致相似的fun (a,b) -> b,a).

简单的方法

一种选择是建立专门的实现efold为每个n,m配对你所关心的.举例来说,如果我们要总结包含在列表的长度Nest,然后n _m _都将只是int.我们可以稍微概括一下,n _并且m _不依赖于他们的论点:

let rec efold<'n,'m,'a> (e:'n) (f:'m*'n->'n) (g:Pair<'m> -> 'm) (h:'a->'m) : Nest<'a> -> 'n = function
| Nil -> e
| Cons(x,xs) -> f (h x, efold e f g (g << (pair h)) xs)

let total = efold 0 up up id example
Run Code Online (Sandbox Code Playgroud)

另一方面,如果n并且m确实使用了它们的参数,那么你需要定义一个单独的特化(另外,你可能需要为每个多态参数创建新的类型,因为F#对更高级别类型的编码很尴尬).例如,要将嵌套值收集到您想要的列表中n 'a= list<'a>m 'b= 'b.然后e我们可以观察到类型的唯一值forall 'a.list<'a>[],而不是为参数类型定义新类型,所以我们可以写:

type ListIdF =
    abstract Apply : 'a * list<Pair<'a>> -> list<'a>

type ListIdG =
    abstract Apply : Pair<'a> -> Pair<'a>

let rec efold<'a,'b> (f:ListIdF) (g:ListIdG) (h:'a -> 'b) : Nest<'a> -> list<'b> = function
| Nil -> []
| Cons(x,xs) -> f.Apply(h x, efold f g (pair h >> g.Apply) xs)

let toList n = efold { new ListIdF with member __.Apply(a,l) = a::(List.collect (fun (x,y) -> [x;y]) l) } { new ListIdG with member __.Apply(p) = p } id n
Run Code Online (Sandbox Code Playgroud)

复杂的方法

虽然F#并不直接支持更高级的类型,但事实证明可以以一种有点忠实的方式模拟它们.这是高级图书馆采取的方法.这是最小版本的样子.

我们创建一个App<'T,'a>代表某种类型应用程序的类型T<'a>,但是我们将创建一个虚拟伴侣类型,它可以作为第一个类型参数App<_,_>:

type App<'F, 'T>(token : 'F, value : obj) = 
    do
        if obj.ReferenceEquals(token, Unchecked.defaultof<'F>) then
            raise <| new System.InvalidOperationException("Invalid token")

    // Apply the secret token to have access to the encapsulated value
    member self.Apply(token' : 'F) : obj =
        if not (obj.ReferenceEquals(token, token')) then
            raise <| new System.InvalidOperationException("Invalid token")
        value 
Run Code Online (Sandbox Code Playgroud)

现在我们可以为我们关心的类型构造函数定义一些伴随类型(这些类型构造函数通常可以存在于某个共享库中):

// App<Const<'a>, 'b> represents a value of type 'a (that is, ignores 'b)
type Const<'a> private () =
    static let token = Const ()
    static member Inj (value : 'a) =
        App<Const<'a>, 'b>(token, value)
    static member Prj (app : App<Const<'a>, 'b>) : 'a =
        app.Apply(token) :?> _

// App<List, 'a> represents list<'a>
type List private () = 
    static let token = List()
    static member Inj (value : 'a list) =
        App<List, 'a>(token, value)
    static member Prj (app : App<List, 'a>) : 'a list =
        app.Apply(token) :?> _

// App<Id, 'a> represents just a plain 'a
type Id private () =
    static let token = Id()
    static member Inj (value : 'a) =
        App<Id, 'a>(token, value)
    static member Prj (app : App<Id, 'a>) : 'a =
        app.Apply(token) :?> _

// App<Nest, 'a> represents a Nest<'a>
type Nest private () =
    static let token = Nest()
    static member Inj (value : Nest<'a>) =
        App<Nest, 'a>(token, value)
    static member Prj (app : App<Nest, 'a>) : Nest<'a> =
        app.Apply(token) :?> _
Run Code Online (Sandbox Code Playgroud)

现在我们可以一次性为有效折叠的参数定义更高级别的类型:

// forall a. n a
type E<'N> =
    abstract Apply<'a> : unit -> App<'N,'a>

// forall a.(m a, n (Pair a)) -> n a)
type F<'M,'N> =
    abstract Apply<'a> : App<'M,'a> * App<'N,'a*'a> -> App<'N,'a>

// forall a.Pair (m a) -> m (Pair a))
type G<'M> =
    abstract Apply<'a> : App<'M,'a> * App<'M,'a> -> App<'M,'a*'a>
Run Code Online (Sandbox Code Playgroud)

所以折叠只是:

let rec efold<'N,'M,'a,'b> (e:E<'N>) (f:F<'M,'N>) (g:G<'M>) (h:'a -> App<'M,'b>) : Nest<'a> -> App<'N,'b> = function
| Nil -> e.Apply()
| Cons(x,xs) -> f.Apply(h x, efold e f g (g.Apply << pair h) xs)
Run Code Online (Sandbox Code Playgroud)

现在要调用efold我们需要在各种调用方法InjPrj方法中进行调用,但其他方面看起来都像我们期望的那样:

let toList n = 
    efold { new E<_> with member __.Apply() = List.Inj [] } 
          { new F<_,_> with member __.Apply(m,n) = Id.Prj m :: (n |> List.Prj |> List.collect (fun (x,y) -> [x;y])) |> List.Inj }
          { new G<_> with member __.Apply(m1,m2) = (Id.Prj m1, Id.Prj m2) |> Id.Inj }
          Id.Inj
          n
    |> List.Prj

let sumElements n =
    efold { new E<_> with member __.Apply() = Const.Inj 0 }
          { new F<_,_> with member __.Apply(m,n) = Const.Prj m + Const.Prj n |> Const.Inj }
          { new G<_> with member __.Apply(m1,m2) = Const.Prj m1 + Const.Prj m2 |> Const.Inj }
          Const.Inj
          n
    |> Const.Prj

let reverse n = 
    efold { new E<_> with member __.Apply() = Nest.Inj Nil }
          { new F<_,_> with member __.Apply(m,n) = Cons(Id.Prj m, Nest.Prj n) |> Nest.Inj }
          { new G<_> with member __.Apply(m1,m2) = (Id.Prj 2, Id.Prj m1) |> Id.Inj }
          Id.Inj
          n
    |> Nest.Prj
Run Code Online (Sandbox Code Playgroud)

希望这里的模式很清楚:在每个对象表达式中,应用程序方法都会抛出每个参数,对它们进行操作,然后将结果注入一个App<_,_>类型.通过一些inline魔术,我们可以使这看起来更加一致(以少数类型注释为代价):

let inline (|Prj|) (app:App< ^T, 'a>) = (^T : (static member Prj : App< ^T, 'a> -> 'b) app)
let inline prj (Prj x) = x
let inline inj x = (^T : (static member Inj : 'b -> App< ^T, 'a>) x)

let toList n = 
    efold { new E<List> with member __.Apply() = inj [] } 
          { new F<Id,_> with member __.Apply(Prj m, Prj n) = m :: (n |> List.collect (fun (x,y) -> [x;y])) |> inj }
          { new G<_> with member __.Apply(Prj m1,Prj m2) = (m1, m2) |> inj }
          inj
          n
    |> prj

let sumElements n =
    efold { new E<Const<_>> with member __.Apply() = inj 0 }
          { new F<Const<_>,_> with member __.Apply(Prj m, Prj n) = m + n |> inj }
          { new G<_> with member __.Apply(Prj m1,Prj m2) = m1 + m2 |> inj }
          inj
          n
    |> prj

let reverse n = 
    efold { new E<_> with member __.Apply() = Nest.Inj Nil }
          { new F<Id,_> with member __.Apply(Prj m,Prj n) = Cons(m, n) |> inj }
          { new G<_> with member __.Apply(Prj m1,Prj m2) = (m2, m1) |> inj }
          inj
          n
    |> prj
Run Code Online (Sandbox Code Playgroud)