我想编写一个代码来计算和求和任何正数和负数系列。
数字为正数或负数(无零)。
我用for循环编写了代码。有没有创意的替代品?
set.seed(100)
x <- round(rnorm(20, sd = 0.02), 3)
Run Code Online (Sandbox Code Playgroud)
x = [-0.01, 0.003, -0.002, 0.018, 0.002, 0.006, -0.012, 0.014, -0.017, -0.007,
0.002, 0.002, -0.004, 0.015, 0.002, -0.001, -0.008, 0.01, -0.018, 0.046]
Run Code Online (Sandbox Code Playgroud)
sign_indicator <- ifelse(x > 0, 1,-1)
number_of_sequence <- rep(NA, 20)
n <- 1
for (i in 2:20) {
if (sign_indicator[i] == sign_indicator[i - 1]) {
n <- n + 1
} else{
n <- 1
}
number_of_sequence[i] <- n
}
number_of_sequence[1] <- 1
#############################
summation <- rep(NA, 20)
for (i in 1:20) {
summation[i] <- sum(x[i:(i + 1 - number_of_sequence[i])])
}
Run Code Online (Sandbox Code Playgroud)
sign_indicator = [1 if i > 0 else -1 for i in X]
number_of_sequence = [1]
N = 1
for i in range(1, len(sign_indicator)):
if sign_indicator[i] == sign_indicator[i - 1]:
N += 1
else:
N = 1
number_of_sequence.append(N)
#############################
summation = []
for i in range(len(X)):
if number_of_sequence[i] == 1:
summation.append(X[i])
else:
summation.append(sum(X[(i + 1 - number_of_sequence[i]):(i + 1)]))
Run Code Online (Sandbox Code Playgroud)
x n_of_sequence sum
1 -0.010 1 -0.010
2 0.003 1 0.003
3 -0.002 1 -0.002
4 0.018 1 0.018
5 0.002 2 0.020
6 0.006 3 0.026
7 -0.012 1 -0.012
8 0.014 1 0.014
9 -0.017 1 -0.017
10 -0.007 2 -0.024
11 0.002 1 0.002
12 0.002 2 0.004
13 -0.004 1 -0.004
14 0.015 1 0.015
15 0.002 2 0.017
16 -0.001 1 -0.001
17 -0.008 2 -0.009
18 0.010 1 0.010
19 -0.018 1 -0.018
20 0.046 1 0.046
Run Code Online (Sandbox Code Playgroud)
bom*_*mbs 17
其他解决方案看起来没问题,但对于这个简单的问题,您实际上并不需要使用复杂的语言功能或库函数。
result, prev = [], None
for idx, cur in enumerate(x):
if not prev or (prev > 0) != (cur > 0):
n, summation = 1, cur
else:
n, summation = n + 1, summation + cur
result.append((idx, cur, n, summation))
prev = cur
Run Code Online (Sandbox Code Playgroud)
如您所见,您并不真正需要sign_indicator列表、两个 for 循环或range问题部分中的代码段中的函数。
如果您希望索引从 1 开始,请使用enumerate(x, 1)而不是enumerate(x)
要查看结果,您可以运行以下代码
for idx, num, length, summation in result:
print(f"{idx:>2d} {num:.3f} {length:>2d} {summation:.3f}")
Run Code Online (Sandbox Code Playgroud)
Ron*_*hah 14
在 R 中,您可以使用data.tablesrleid来创建具有正数和负数序列的组,然后在每个组中创建一系列行并对x值进行累积和。
library(data.table)
df <- data.table(x)
df[, c("n_of_sequence", "sum") := list(seq_len(.N), cumsum(x)), by = rleid(sign(x))]
df
# x n_of_sequence sum
# 1: -0.010 1 -0.010
# 2: 0.003 1 0.003
# 3: -0.002 1 -0.002
# 4: 0.018 1 0.018
# 5: 0.002 2 0.020
# 6: 0.006 3 0.026
# 7: -0.012 1 -0.012
# 8: 0.014 1 0.014
# 9: -0.017 1 -0.017
#10: -0.007 2 -0.024
#11: 0.002 1 0.002
#12: 0.002 2 0.004
#13: -0.004 1 -0.004
#14: 0.015 1 0.015
#15: 0.002 2 0.017
#16: -0.001 1 -0.001
#17: -0.008 2 -0.009
#18: 0.010 1 0.010
#19: -0.018 1 -0.018
#20: 0.046 1 0.046
Run Code Online (Sandbox Code Playgroud)
我们也可以使用rleidindplyr来创建组并执行相同的操作。
library(dplyr)
df %>%
group_by(gr = data.table::rleid(sign(x))) %>%
mutate(n_of_sequence = row_number(), sum = cumsum(x))
Run Code Online (Sandbox Code Playgroud)
Ame*_*eer 10
您可以使用rlefrom baseto计算每个符号的运行长度并执行类似的操作。
set.seed(0)
z <- round(rnorm(20, sd = 0.02), 3)
run_lengths <- rle(sign(z))$lengths
run_lengths
# [1] 1 1 1 3 1 1 2 2 1 2 2 1 1 1
Run Code Online (Sandbox Code Playgroud)
要得到 n_of_sequence
n_of_sequence <- run_lengths %>% map(seq) %>% unlist
n_of_sequence
# [1] 1 1 1 1 2 3 1 1 1 2 1 2 1 1 2 1 2 1 1 1
Run Code Online (Sandbox Code Playgroud)
最后,为了得到序列的总和,
start <- cumsum(c(1,run_lengths))
start <- start[-length(start)] # start points of each series
map2(start,run_lengths,~cumsum(z[.x:(.x+.y-1)])) %>% unlist()
# [1] -0.010 0.003 -0.002 0.018 0.020 0.026 -0.012 0.014 -0.017 -0.024
# [11] 0.002 0.004 -0.004 0.015 0.017 -0.001 -0.009 0.010 -0.018 0.046
Run Code Online (Sandbox Code Playgroud)
我建议[R包亚军这类操作。
streak_run计算相同值的连续出现,sum_run计算窗口中的总和,其长度由k参数定义。
这是解决方案:
set.seed(100)
x <- round(rnorm(20, sd = 0.02), 3)
n_of_sequence <- runner::streak_run(x > 0)
sum <- runner::sum_run(x, k = n_of_sequence)
data.frame(x, n_of_sequence, sum)
# x n_of_sequence sum
# 1 -0.010 1 -0.010
# 2 0.003 1 0.003
# 3 -0.002 1 -0.002
# 4 0.018 1 0.018
# 5 0.002 2 0.020
# 6 0.006 3 0.026
# 7 -0.012 1 -0.012
# 8 0.014 1 0.014
# 9 -0.017 1 -0.017
# 10 -0.007 2 -0.024
# 11 0.002 1 0.002
# 12 0.002 2 0.004
# 13 -0.004 1 -0.004
# 14 0.015 1 0.015
# 15 0.002 2 0.017
# 16 -0.001 1 -0.001
# 17 -0.008 2 -0.009
# 18 0.010 1 0.010
# 19 -0.018 1 -0.018
# 20 0.046 1 0.046
Run Code Online (Sandbox Code Playgroud)
低于基准以比较实际解决方案
set.seed(0)
x <- round(rnorm(10000, sd = 0.02), 3)
library(runner)
runner_streak <- function(x) {
n_of_sequence <- streak_run(x > 0)
sum <- sum_run(x, k = n_of_sequence)
}
library(data.table)
dt <- data.table(x)
dt_streak <- function(dt) {
dt[, c("n_of_sequence", "sum") := list(seq_len(.N), cumsum(x)),rleid(sign(x))]
}
rle_streak <- function(x) {
run_lengths <- rle(sign(x))$lengths
run_lengths
n_of_sequence <- run_lengths %>% map(seq) %>% unlist
start <- cumsum(c(1,run_lengths))
start <- start[-length(start)]
sum <- map2(start,run_lengths,~cumsum(x[.x:(.x+.y-1)])) %>% unlist()
}
library(tidyverse)
df <- tibble(x = x)
tv_streak <- function(x) {
res <- df %>%
mutate(seqno = cumsum(c(1, diff(sign(x)) != 0))) %>%
group_by(seqno) %>%
mutate(n_of_sequence = row_number(),
sum = cumsum(x)) %>%
ungroup() %>%
select(-seqno)
}
count_and_sum <- function(x) {
runs <- rle((x > 0) * 1)$lengths
groups <- split(x, rep(1:length(runs), runs))
output <- function(group)
data.frame(x = group, n = seq_along(group), sum = cumsum(group))
result <- as.data.frame(do.call(rbind, lapply(groups, output)))
`rownames<-`(result, 1:nrow(result))
}
Run Code Online (Sandbox Code Playgroud)
microbenchmark::microbenchmark(
runner_streak(x),
dt_streak(dt),
rle_streak(x),
tv_streak(df),
count_and_sum(x),
times = 100L
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# runner_streak(x) 4.240192 4.833563 6.321697 5.300817 6.543926 14.80221 100
# dt_streak(dt) 7.648100 8.587887 10.862806 9.650483 11.295488 34.66027 100
# rle_streak(x) 42.321506 55.397586 64.195692 63.404403 67.813738 167.71444 100
# tv_streak(df) 31.398885 36.333751 45.141452 40.800077 45.756279 163.19535 100
# count_and_sum(x) 1691.438977 1919.518282 2306.036783 2149.543281 2499.951020 6158.43384 100
Run Code Online (Sandbox Code Playgroud)
这是 R 中的一个简单的非循环函数:
count_and_sum <- function(x)
{
runs <- rle((x > 0) * 1)$lengths
groups <- split(x, rep(1:length(runs), runs))
output <- function(group) data.frame(x = group, n = seq_along(group), sum = cumsum(group))
result <- as.data.frame(do.call(rbind, lapply(groups, output)))
`rownames<-`(result, 1:nrow(result))
}
Run Code Online (Sandbox Code Playgroud)
所以你可以这样做:
set.seed(100)
x <- round(rnorm(20, sd = 0.02), 3)
count_and_sum(x)
#> x n sum
#> 1 -0.010 1 -0.010
#> 2 0.003 1 0.003
#> 3 -0.002 1 -0.002
#> 4 0.018 1 0.018
#> 5 0.002 2 0.020
#> 6 0.006 3 0.026
#> 7 -0.012 1 -0.012
#> 8 0.014 1 0.014
#> 9 -0.017 1 -0.017
#> 10 -0.007 2 -0.024
#> 11 0.002 1 0.002
#> 12 0.002 2 0.004
#> 13 -0.004 1 -0.004
#> 14 0.015 1 0.015
#> 15 0.002 2 0.017
#> 16 -0.001 1 -0.001
#> 17 -0.008 2 -0.009
#> 18 0.010 1 0.010
#> 19 -0.018 1 -0.018
#> 20 0.046 1 0.046
Run Code Online (Sandbox Code Playgroud)
由reprex 包(v0.3.0)于 2020 年 2 月 16 日创建
Python 中两种不同的惰性解决方案,使用itertools 模块。
from itertools import accumulate, groupby
result = (
item
for _, group in groupby(x, key=lambda n: n < 0)
for item in enumerate(accumulate(group), 1)
)
Run Code Online (Sandbox Code Playgroud)
from itertools import accumulate
def sign_count_sum(count_sum, value):
count, prev_sum = count_sum
same_sign = (prev_sum < 0) is (value < 0)
if same_sign:
return count + 1, prev_sum + value
else:
return 1, value
result = accumulate(x, sign_count_sum, initial=(0, 0))
next(result) # needed to skip the initial (0, 0) item
Run Code Online (Sandbox Code Playgroud)
该initial关键字参数是在Python 3.8增加。在早期版本中,您可以使用itertools.chain预先添加 (0,0)-元组:
result = accumulate(chain([(0, 0)], x), sign_count_sum)
Run Code Online (Sandbox Code Playgroud)
输出如预期:
for (i, v), (c, s) in zip(enumerate(x), result):
print(f"{i:3} {v:7.3f} {c:3} {s:7.3f}")
Run Code Online (Sandbox Code Playgroud)
0 -0.010 1 -0.010
1 0.003 1 0.003
2 -0.002 1 -0.002
3 0.018 1 0.018
4 0.002 2 0.020
5 0.006 3 0.026
6 -0.012 1 -0.012
7 0.014 1 0.014
8 -0.017 1 -0.017
9 -0.007 2 -0.024
10 0.002 1 0.002
11 0.002 2 0.004
12 -0.004 1 -0.004
13 0.015 1 0.015
14 0.002 2 0.017
15 -0.001 1 -0.001
16 -0.008 2 -0.009
17 0.010 1 0.010
18 -0.018 1 -0.018
19 0.046 1 0.046
Run Code Online (Sandbox Code Playgroud)
这是一个简单的tidyverse解决方案......
library(tidyverse) #or just dplyr and tidyr
set.seed(100)
x <- round(rnorm(20, sd = 0.02), 3)
df <- tibble(x = x) %>%
mutate(seqno = cumsum(c(1, diff(sign(x)) != 0))) %>% #identify sequence ids
group_by(seqno) %>% #group by sequences
mutate(n_of_sequence = row_number(), #count row numbers for each group
sum = cumsum(x)) %>% #cumulative sum for each group
ungroup() %>%
select(-seqno) #remove sequence id
df
# A tibble: 20 x 3
x n_of_sequence sum
<dbl> <int> <dbl>
1 -0.01 1 -0.01
2 0.003 1 0.003
3 -0.002 1 -0.002
4 0.018 1 0.018
5 0.002 2 0.0200
6 0.006 3 0.026
7 -0.012 1 -0.012
8 0.014 1 0.014
9 -0.017 1 -0.017
10 -0.007 2 -0.024
11 0.002 1 0.002
12 0.002 2 0.004
13 -0.004 1 -0.004
14 0.015 1 0.015
15 0.002 2 0.017
16 -0.001 1 -0.001
17 -0.008 2 -0.009
18 0.01 1 0.01
19 -0.018 1 -0.018
20 0.046 1 0.046
Run Code Online (Sandbox Code Playgroud)
至于 Python,有人会想出一个使用 Pandas 库的解决方案。同时,这里有一个简单的建议:
class Combiner:
def __init__(self):
self.index = self.seq_index = self.summation = 0
def combine(self, value):
self.index += 1
if value * self.summation <= 0:
self.seq_index = 1
self.summation = value
else:
self.seq_index += 1
self.summation += value
return self.index, value, self.seq_index, self.summation
c = Combiner()
lst = [c.combine(v) for v in x]
for t in lst:
print(f"{t[0]:3} {t[1]:7.3f} {t[2]:3} {t[3]:7.3f}")
Run Code Online (Sandbox Code Playgroud)
输出:
1 -0.010 1 -0.010
2 0.003 1 0.003
3 -0.002 1 -0.002
4 0.018 1 0.018
5 0.002 2 0.020
6 0.006 3 0.026
7 -0.012 1 -0.012
8 0.014 1 0.014
9 -0.017 1 -0.017
10 -0.007 2 -0.024
11 0.002 1 0.002
12 0.002 2 0.004
13 -0.004 1 -0.004
14 0.015 1 0.015
15 0.002 2 0.017
16 -0.001 1 -0.001
17 -0.008 2 -0.009
18 0.010 1 0.010
19 -0.018 1 -0.018
20 0.046 1 0.046
Run Code Online (Sandbox Code Playgroud)
如果你需要单独的列表,你可以做
idxs, vals, seqs, sums = (list(tpl) for tpl in zip(*lst))
Run Code Online (Sandbox Code Playgroud)
或者,如果迭代器没问题,只需
idxs, vals, seqs, sums = zip(*lst)
Run Code Online (Sandbox Code Playgroud)
(此处解释)
| 归档时间: |
|
| 查看次数: |
1557 次 |
| 最近记录: |