在Haskell中高效实现Fisher精确测试

Seb*_*ila 1 haskell functional-programming

我试图在Haskell中实现Fisher的精确测试,因此给出四个自然数a,b,c和d,我想计算公式:

p =((a + b)!*(a + c)!*(b + d)!*(c + d)!)/(a!*b!*c!*d!*(a + b +) C + d)!)

我尝试了3种实现,但需要更高效的实现:

解决方案1:

module Main where

import Data.Ratio

factori n = fact_acc n 1

fact_acc 0 a = a
fact_acc n a = fact_acc (n-1) $! (n*a)

a = 1
b = 9
c = 7
d = 3

n1 = (factori (a+b)) `div` (factori a)
n2 = (factori (a+c)) `div` (factori c)
n3 = (factori (b+d)) `div` (factori b)
n4 = (factori (c+d)) `div` (factori d)
numer = n1 * n2 * n3 * n4
denom = factori (a+b+c+d)
p = (fromIntegral numer) / (fromIntegral denom)

main = do
    print denom 
    print p
Run Code Online (Sandbox Code Playgroud)

解决方案2(对不起排长队):

module Main where

factori n = fact_acc n 1
fact_acc 0 a = a
fact_acc n a = fact_acc (n-1) $! (n*a)

mul_from_to m n = mul_acc m n 1
mul_acc m n a = if (m==n) then (n*a) else mul_acc (m+1) n $! (m*a)

compute_p a b c d
     | ((a+b)>(a+c) && (a+b)>(b+d) && (a+b)>(c+d) && a<b && a<c && a<d) = fromRational (fromIntegral ((mul_from_to (c+1) (a+c)) * (mul_from_to (b+1) (b+d)) * (mul_from_to (d+1) (c+d))) / fromIntegral ((factori a) * (mul_from_to (a+b+1) (a+b+c+d))))
     | ((a+b)>(a+c) && (a+b)>(b+d) && (a+b)>(c+d) && b<c && b<d)        = fromRational (fromIntegral ((mul_from_to (a+1) (a+c)) * (mul_from_to (d+1) (b+d)) * (mul_from_to (c+1) (c+d))) / fromIntegral ((factori b) * (mul_from_to (a+b+1) (a+b+c+d))))
     | ((a+b)>(a+c) && (a+b)>(b+d) && (a+b)>(c+d) && c<d)               = fromRational (fromIntegral ((mul_from_to (a+1) (a+c)) * (mul_from_to (b+1) (b+d)) * (mul_from_to (d+1) (c+d))) / fromIntegral ((factori c) * (mul_from_to (a+b+1) (a+b+c+d))))
     | ((a+b)>(a+c) && (a+b)>(b+d) && (a+b)>(c+d))                      = fromRational (fromIntegral ((mul_from_to (a+1) (a+c)) * (mul_from_to (b+1) (b+d)) * (mul_from_to (c+1) (c+d))) / fromIntegral ((factori d) * (mul_from_to (a+b+1) (a+b+c+d))))
     | ((a+c)>(b+d) && (a+c)>(c+d) && a<b && a<c && a<d)                = fromRational (fromIntegral ((mul_from_to (b+1) (a+b)) * (mul_from_to (d+1) (b+d)) * (mul_from_to (c+1) (c+d))) / fromIntegral ((factori a) * (mul_from_to (a+c+1) (a+b+c+d))))
     | ((a+c)>(b+d) && (a+c)>(c+d) && b<c && b<d)                       = fromRational (fromIntegral ((mul_from_to (a+1) (a+b)) * (mul_from_to (d+1) (b+d)) * (mul_from_to (c+1) (c+d))) / fromIntegral ((factori b) * (mul_from_to (a+c+1) (a+b+c+d))))
     | ((a+c)>(b+d) && (a+c)>(c+d) && c<d)                              = fromRational (fromIntegral ((mul_from_to (a+1) (a+b)) * (mul_from_to (b+1) (b+d)) * (mul_from_to (d+1) (c+d))) / fromIntegral ((factori c) * (mul_from_to (a+c+1) (a+b+c+d))))
     | ((a+c)>(b+d) && (a+c)>(c+d))                                     = fromRational (fromIntegral ((mul_from_to (a+1) (a+b)) * (mul_from_to (b+1) (b+d)) * (mul_from_to (c+1) (c+d))) / fromIntegral ((factori d) * (mul_from_to (a+c+1) (a+b+c+d))))
     | ((b+d)>(c+d) && a<b && a<c && a<d)                               = fromRational (fromIntegral ((mul_from_to (b+1) (a+b)) * (mul_from_to (c+1) (a+c)) * (mul_from_to (d+1) (c+d))) / fromIntegral ((factori a) * (mul_from_to (b+d+1) (a+b+c+d))))
     | ((b+d)>(c+d) && b<c && b<d)                                      = fromRational (fromIntegral ((mul_from_to (a+1) (a+b)) * (mul_from_to (c+1) (a+c)) * (mul_from_to (d+1) (c+d))) / fromIntegral ((factori b) * (mul_from_to (b+d+1) (a+b+c+d))))
     | ((b+d)>(c+d) && c<d)                                             = fromRational (fromIntegral ((mul_from_to (b+1) (a+b)) * (mul_from_to (a+1) (a+c)) * (mul_from_to (d+1) (c+d))) / fromIntegral ((factori c) * (mul_from_to (b+d+1) (a+b+c+d))))
     | ((b+d)>(c+d))                                                    = fromRational (fromIntegral ((mul_from_to (b+1) (a+b)) * (mul_from_to (a+1) (a+c)) * (mul_from_to (c+1) (c+d))) / fromIntegral ((factori d) * (mul_from_to (b+d+1) (a+b+c+d))))
     | (a<b && a<c && a<d)                                              = fromRational (fromIntegral ((mul_from_to (b+1) (a+b)) * (mul_from_to (c+1) (a+c)) * (mul_from_to (d+1) (b+d))) / fromIntegral ((factori a) * (mul_from_to (c+d+1) (a+b+c+d))))
     | (b<c && b<d)                                                     = fromRational (fromIntegral ((mul_from_to (a+1) (a+b)) * (mul_from_to (c+1) (a+c)) * (mul_from_to (d+1) (b+d))) / fromIntegral ((factori b) * (mul_from_to (c+d+1) (a+b+c+d))))
     | (c<d)                                                            = fromRational (fromIntegral ((mul_from_to (b+1) (a+b)) * (mul_from_to (a+1) (a+c)) * (mul_from_to (d+1) (b+d))) / fromIntegral ((factori c) * (mul_from_to (c+d+1) (a+b+c+d))))
     | otherwise                                                        = fromRational (fromIntegral ((mul_from_to (a+1) (a+b)) * (mul_from_to (c+1) (a+c)) * (mul_from_to (b+1) (b+d))) / fromIntegral ((factori d) * (mul_from_to (c+d+1) (a+b+c+d))))

a = 50000
b = 910
c = 11
d = 300

p = compute_p a b c d

main = do
  print p
Run Code Online (Sandbox Code Playgroud)

解决方案3:

module Main where

import Data.Ratio

factorial n = factorials !! pred n
factorials = scanl1 (\acc x -> acc * x) [1..maxim]

a = 1
b = 9
c = 7
d = 3

maxim=a+b+c+d

n1 = (factorial (a+b)) `div` (factorial a)
n2 = (factorial (a+c)) `div` (factorial c)
n3 = (factorial (b+d)) `div` (factorial b)
n4 = (factorial (c+d)) `div` (factorial d)

numer = n1 * n2 * n3 * n4
denom = factorial (a+b+c+d)

p = (fromIntegral numer) / (fromIntegral denom)

main = do
  print denom
  print p
Run Code Online (Sandbox Code Playgroud)

Dan*_*ner 6

你计算

factorial (a+b) `div` factorial a
Run Code Online (Sandbox Code Playgroud)

几次,具有不同的ab.这可以通过仅之间相乘的数量来改善aa+b; 这减少了乘法的总数并避免完全划分,所以应该帮助一些.

根据比例,树折而不是严格的左折可以提高进行大量乘法的性能(因为大约相同幅度的乘法数比乘以一个大数和一个小数更有效).像这样的东西:

foldb' :: (a -> a -> a) -> a -> [a] -> a
foldb' f z = go where
    go [] = z
    go [v] = v
    go long = go (adjacent long)

    adjacent (x:y:rest) = let !h = f x y in h : adjacent rest
    adjacent short = short
Run Code Online (Sandbox Code Playgroud)

然后foldb' (*) 1,您可以比显式递归更快地用于计算产品.

不过,我认为这两项改进相当微不足道.他们肯定不是渐近的改进.(更新:在我的测试中,使用树形折叠实际上是一个相当大的胜利:factorial 100000需要943ms foldl',18ms foldb',50倍加速.)

  • 这被称为**Pochhammer函数**或**上升阶乘**函数.排序并选择最大的两个元素来减少(x1 + x2)!/(x1 + x2 + x3 + x4)!获得最大收益. (4认同)