如何使Julia Code更高效？目前它的性能甚至比Python差

Question

如何使Julia Code更高效？目前它的性能甚至比Python差

我是朱莉娅的新手并且一直在尝试它,因为我知道它有惊人的表现.但我仍然要体验那些承诺的表演.我已经尝试了许多方法来增强"JULIA HIGH PERFORMANCE"一书中描述的性能,这使得代码的可读性稍差.但是,我的python代码比我的Julia代码快得多,对于基准案例来说至少快3倍.要么我正在做一些非常错误的代码,这些代码必须是朱莉娅的罪,或者说朱莉娅不能做到这一点.请证明我以后的错误.

我在代码中尝试做的是将不同的球分配到不同的盒子中,每个盒子的容量有最大和最小限制.球放在盒子里的顺序也很重要.我需要在最短的时间内使用给定的约束生成所有可能的赋值.

PYTHON代码:

import itertools
import time

max_balls = 5
min_balls = 0

def get_assignments(balls, boxes, assignments=[[]]):
    all_assignments = []
    upper_ball_limit = len(balls)
    if upper_ball_limit > max_balls:
        upper_ball_limit = max_balls

    n_boxes = len(boxes)
    lower_ball_limit = len(balls) - upper_ball_limit * (n_boxes - 1)
    if lower_ball_limit < min_balls:
        lower_ball_limit = min_balls

    if len(boxes) == 0:
        raise Exception("No delivery boys found")

    elif len(boxes) == 1:
        for strategy in itertools.permutations(balls, upper_ball_limit):
            #             valid = evaluate_strategy(strategy, db_id)
            for subplan in assignments:
                subplan_copy = subplan[:]
                box_id = boxes[0]
                subplan_copy.append((box_id, strategy))
                all_assignments.append(subplan_copy)
        return all_assignments
    else:
        box_id = boxes[0]
        for i in range(lower_ball_limit, upper_ball_limit+ 1):
            for strategy in itertools.permutations(balls, i):
                temp_plans = []
                for subplan in assignments:
                    subplan_copy = subplan[:]
                    subplan_copy.append((box_id, strategy))
                    temp_plans.append(subplan_copy)
                    remaining_balls = set(balls).difference(strategy)
                    remaining_boxes = list(set(boxes).difference([box_id]))
                    if remaining_balls:
                        all_assignments.extend(get_assignments(remaining_balls, remaining_boxes, temp_plans))
                    else:
                        all_assignments.extend(temp_plans)
        return all_assignments

balls = range(1, 9)
boxes = [1, 2, 3, 4]

t = time.time()
all_assignments = get_assignments(balls, boxes)

print('Number of assignments: %s' % len(all_assignments))
print('Time taken: %s' % (time.time()-t))

Run Code Online (Sandbox Code Playgroud)

这是我尝试为上述内容编写JULIA CODE.

#!/usr/bin/env julia

using Combinatorics

const max_balls=5
const min_balls=0

function plan_assignments(balls::Vector{Int32}, boxes ; plans=[Vector{Tuple{Int32,Array{Int32,1}}}(length(boxes))])
const n_boxes = length(boxes)
const n_balls = length(balls)
const n_plans = length(plans)

if n_boxes*max_balls < n_balls
  print("Invalid Inputs: Number of balls exceed the number of boxes.")
end

all_plans = Vector{Tuple{Int32,Array{Int32,1}}}[]
upper_box_limit = n_balls
if upper_box_limit > max_balls
    upper_box_limit = max_balls
end
lower_box_limit = n_balls - upper_box_limit * (n_boxes-1)

if lower_box_limit < min_balls
    lower_box_limit = min_balls
end

if n_boxes == 1
    box_id = boxes[1]
    @inbounds for strategy in Combinatorics.permutations(balls, upper_box_limit)
        @inbounds for subplan in plans
            subplan = subplan[:]
            subplan[tn_boxes - n_boxes + 1] = (box_id, strategy)
            all_plans = push!(all_plans, subplan)
        end
    end
    return all_plans

else
    box_id = boxes[1]
    @inbounds for i in lower_box_limit:upper_box_limit
        @inbounds for strategy in Combinatorics.permutations(balls, i)
            temp_plans = Array{Vector{Tuple{Int32,Array{Int32,1}}},1}(n_plans)
            # temp_plans = []

            @inbounds for (i,subplan) in zip(1:n_plans, plans)
                subplan = subplan[:]
                subplan[tn_boxes - n_boxes + 1] = (box_id, strategy)
                temp_plans[i] = subplan
                # subplan = push!(subplan, (db_id, strategy))
                # temp_plans = push!(temp_plans, subplan)

                remaining_balls = filter((x) -> !(x in strategy), balls)
                remaining_boxes = filter((x) -> x != box_id , boxes)

                if length(remaining_balls) > 0
                  @inbounds for plan in plan_assignments(remaining_balls, remaining_boxes, plans=temp_plans)
                    push!(all_plans, plan)
                  end
                    # append!(all_plans, plan_assignments(remaining_orders, remaining_delivery_boys, plans=temp_plans))
                else
                  @inbounds for plan in temp_plans
                    push!(all_plans, plan)
                  end
                    # append!(all_plans, temp_plans)

                end
            end
        end
    end
    return all_plans
end
end

balls = Int32[1,2,3,4,5,6,7,8]
boxes = Int32[1,2,3,4]

const tn_boxes = length(boxes)

@timev all_plans = plan_assignments(balls, boxes)
print(length(all_plans))

Run Code Online (Sandbox Code Playgroud)

我的基准时间如下:

对于Python:

Number of assignments: 5040000
Time taken: 22.5003659725

Run Code Online (Sandbox Code Playgroud)

对朱莉娅:(这是折扣编译时间.)

76.986338 seconds (122.94 M allocations: 5.793 GB, 77.01% gc time)
elapsed time (ns): 76986338257
gc time (ns):      59287603693
bytes allocated:   6220111360
pool allocs:       122932049
non-pool GC allocs:10587
malloc() calls:    11
realloc() calls:   18
GC pauses:         270
full collections:  28

Run Code Online (Sandbox Code Playgroud)

Answer 1

Dan*_*etz 7

这是Julia中的另一个版本,更加惯用和修改以避免递归和一些分配:

using Iterators
using Combinatorics

histograms(n,m) = [diff([0;x;n+m]).-1 for x in combinations([1:n+m-1;],m-1)]

good_histograms(n,m,minval,maxval) = 
  [h for h in histograms(n,m) if all(maxval.>=h.>=minval)]

typealias PlanGrid Matrix{SubArray{Int,1,Array{Int,1},Tuple{UnitRange{Int}},true}}

function assignmentplans(balls,boxes,minballs,maxballs)
  nballs, nboxes = length(balls),length(boxes)
  nperms = factorial(nballs)
  partslist = good_histograms(nballs,nboxes,minballs,maxballs)
  plans = PlanGrid(nboxes,nperms*length(partslist))
  permutationvector = vec([balls[p[i]] for i=1:nballs,p in permutations(balls)])
  i1 = 1
  for parts in partslist
    i2 = 0
    breaks = [(x[1]+1:x[2]) for x in partition(cumsum([0;parts]),2,1)]
    for i=1:nperms
      for j=1:nboxes
        plans[j,i1] = view(permutationvector,breaks[j]+i2)
      end
      i1 += 1
      i2 += nballs
    end
  end
  return plans
end

Run Code Online (Sandbox Code Playgroud)

为了计时,我们得到:

julia> assignmentplans([1,2],[1,2],0,2)   # a simple example
2×6 Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},2}:
 Int64[]  Int64[]  [1]  [2]  [1,2]    [2,1]  
 [1,2]    [2,1]    [2]  [1]  Int64[]  Int64[]

julia> @time plans = assignmentplans([1:8;],[1:4;],0,5);
  8.279623 seconds (82.28 M allocations: 2.618 GB, 14.07% gc time)

julia> size(plans)
(4,5040000)

julia> plans[:,1000000]                    # sample ball configuration
4-element Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},1}:
 Int64[]    
 [7,3,8,2,5]
 [4]        
 [6,1]

Run Code Online (Sandbox Code Playgroud)

当然,时间安排因设置而异,但这应该快得多.它不完全是苹果与苹果的比较,但它计算相同的东西.在评论中欢迎时间在海报(或其他人)的机器上.

@manofsins是的,我认为其他的看法是语言之间的翻译应该寻找惯用的等价物而不是文字转换,并且改进算法可以产生比试图加速低效算法更多的好处.(我仍然认为你原来的版本做了太多的任务,但......) (5认同)
"如果以正确的方式写作,朱莉娅才会很快",是的,不是.*Bad*代码将在*any*语言中遇到瓶颈,并且所有语言都存在有效的,非标准的,语言特定的优化.有一个着名的博客文章'beats'julia使用python与许多奥术蟒蛇黑客.朱莉娅的主张是*标准*,与*standard*python/matlab代码(无论是否为矢量化)相比,julia中的非实现代码更快(事实上,推荐!).但是,标准的良好实践仍然很重要(例如避免昂贵的重新分配),不会,故意低效的代码不会神奇地"加速". (5认同)
新代码在我的电脑上需要7秒.在编写了`permutations`,`combination`和`partition`的类型稳定版本,并在代码中为一些理解添加了一些显式类型之后,我可以获得1.5 s的运行时间,大约50%gc时间.粘贴这么多行代码可以吗？也许,我应该打开一个或两个拉取请求. (2认同)

Answer 2

tim*_*tim 5

我对Dan Getz的代码做了一些小改动,以使其类型稳定.主要的问题是,Combinatorics.permutations并且Iterators.partition不是类型稳定的,所以我不得不写这些类型稳定版本(我的更改Combinatorics.combinations实际上是不必要的)

import Base: start, next, done, eltype, length, size
import Base: iteratorsize, SizeUnknown, IsInfinite, HasLength

import Combinatorics: factorial, combinations

# Copied from Combinatorics
# Only one small change in the `nextpermutation` method

immutable Permutations{T}
    a::T
    t::Int
end

eltype{T}(::Type{Permutations{T}}) = Vector{eltype(T)}

length(p::Permutations) = (0 <= p.t <= length(p.a))?factorial(length(p.a), length(p.a)-p.t):0

"""
Generate all permutations of an indexable object. Because the number of permutations can be very large, this function returns an iterator object. Use `collec
t(permutations(array))` to get an array of all permutations.
"""
permutations(a) = Permutations(a, length(a))

"""
Generate all size t permutations of an indexable object.
"""
function permutations(a, t::Integer)
    if t < 0
        t = length(a) + 1
    end
    Permutations(a, t)
end

start(p::Permutations) = [1:length(p.a);]
next(p::Permutations, s) = nextpermutation(p.a, p.t ,s)

function nextpermutation(m, t, state)
    s = copy(state)                     # was s = copy(s) a few lines down
    perm = [m[s[i]] for i in 1:t]
    n = length(s)
    if t <= 0
        return(perm, [n+1])
    end
    if t < n
        j = t + 1
        while j <= n &&  s[t] >= s[j]; j+=1; end
    end
    if t < n && j <= n
        s[t], s[j] = s[j], s[t]
    else
        if t < n
            reverse!(s, t+1)
        end
        i = t - 1
        while i>=1 && s[i] >= s[i+1]; i -= 1; end
        if i > 0
            j = n
            while j>i && s[i] >= s[j]; j -= 1; end
            s[i], s[j] = s[j], s[i]
            reverse!(s, i+1)
        else
            s[1] = n+1
        end
    end
    return(perm, s)
end

done(p::Permutations, s) = !isempty(s) && max(s[1], p.t) > length(p.a) ||  (isempty(s) && p.t > 0)  

# Copied from Iterators
# Returns an `Array` of `Array`s instead of `Tuple`s now

immutable Partition{I}
    xs::I
    step::Int
    n::Int

end

iteratorsize{T<:Partition}(::Type{T}) = SizeUnknown()

eltype{I}(::Type{Partition{I}}) = Vector{eltype(I)}

function partition{I}(xs::I, n::Int, step::Int = n)
    Partition(xs, step, n)
end

function start{I}(it::Partition{I})
    N = it.n
    p = Vector{eltype(I)}(N)
    s = start(it.xs)
    for i in 1:(N - 1)
        if done(it.xs, s)
            break
        end
        (p[i], s) = next(it.xs, s)
    end
    (s, p)
end

function next{I}(it::Partition{I}, state)
    N = it.n
    (s, p0) = state
    (x, s) = next(it.xs, s)
    ans = p0; ans[end] = x

    p = similar(p0)
    overlap = max(0, N - it.step)
    for i in 1:overlap
        p[i] = ans[it.step + i]
    end

    # when step > n, skip over some elements
    for i in 1:max(0, it.step - N)
        if done(it.xs, s)
            break
        end
        (x, s) = next(it.xs, s)
    end

    for i in (overlap + 1):(N - 1)
        if done(it.xs, s)
            break
        end

        (x, s) = next(it.xs, s)
        p[i] = x
    end

    (ans, (s, p))
end

done(it::Partition, state) = done(it.xs, state[1])

# Copied from the answer of Dan Getz
# Added types to comprehensions and used Vector{Int} instead of Int in vcat    

typealias PlanGrid Matrix{SubArray{Int,1,Array{Int,1},Tuple{UnitRange{Int}},true}}

histograms(n,m) = [diff(vcat([0],x,[n+m])).-1 for x in combinations([1:n+m-1;],m-1)]

good_histograms(n,m,minval,maxval) = 
  Vector{Int}[h for h in histograms(n,m) if all(maxval.>=h.>=minval)]

minballs = 0
maxballs = 5

function assignmentplans(balls,boxes,minballs,maxballs)
  nballs, nboxes = length(balls),length(boxes)
  nperms = factorial(nballs)
  partslist = good_histograms(nballs,nboxes,minballs,maxballs)
  plans = PlanGrid(nboxes,nperms*length(partslist))
  permutationvector = vec([balls[p[i]] for i=1:nballs,p in permutations(balls)])
  i1 = 1
  for parts in partslist
    i2 = 0
    breaks = UnitRange{Int64}[(x[1]+1:x[2]) for x in partition(cumsum(vcat([0],parts)),2,1)]
    for i=1:nperms
      for j=1:nboxes
        plans[j,i1] = view(permutationvector,breaks[j]+i2)
      end
      i1 += 1
      i2 += nballs
    end
  end
  return plans
end

@time plans = assignmentplans(1:8, 1:4, 0, 5)

Run Code Online (Sandbox Code Playgroud)

第一次运行的结果是(由于gc,时间变化很大)

  1.589867 seconds (22.02 M allocations: 1.127 GB, 46.50% gc time)
4×5040000 Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},2}:
 Int64[]      Int64[]      Int64[]      Int64[]      Int64[]      Int64[]      …  [8,7,6,5,4]  [8,7,6,5,4]  [8,7,6,5,4]  [8,7,6,5,4]  [8,7,6,5,4]
 Int64[]      Int64[]      Int64[]      Int64[]      Int64[]      Int64[]         [1,3,2]      [2,1,3]      [2,3,1]      [3,1,2]      [3,2,1]    
 [1,2,3]      [1,2,3]      [1,2,3]      [1,2,3]      [1,2,3]      [1,2,3]         Int64[]      Int64[]      Int64[]      Int64[]      Int64[]    
 [4,5,6,7,8]  [4,5,6,8,7]  [4,5,7,6,8]  [4,5,7,8,6]  [4,5,8,6,7]  [4,5,8,7,6]     Int64[]      Int64[]      Int64[]      Int64[]      Int64[]

Run Code Online (Sandbox Code Playgroud)

我没有彻底测试这些变化.另外,我不明白,为什么s = copy(s)工作combinations但不在permutations.有趣的是,如果我尝试使原始版本类型稳定(仍然> 40 s,85%gc时间),那么改进只有微不足道的改进.

归档时间：	9 年，1 月前
查看次数：	459 次
最近记录：	9 年，1 月前