Car*_*oft 5 md5 r endianness md5sum
我正在尝试编写一个 md5 哈希函数而不R调用任何 C 例程。虽然我的代码执行得很好,但输出永远不会匹配tools::md5sum(它确实与各种在线文档中提供的示例匹配)。我怀疑某个地方存在字节顺序(或字顺序)问题;正如下面提供的代码所示,我尝试插入一些触发器,但没有成功。\n我检查了输出中是否存在简单的不匹配,例如正确的字节,但顺序错误,但没有成功。
要运行此函数,您需要库Rmpfr以及bigBits此处提供的函数(flip32、flip16和bigRotate)。
# need these to run:\nlibrary(Rmpfr)\nlibrary(bigBits)\n\nmymd5 <- function(msg){\n\n if(!is(msg,'raw')) msg <- charToRaw(as.character(msg))\n# a const to be used when truncating values > ffffffff\ntwo32 <- as.bigz(2^(as.bigz(32))) \n\nsidx <- c( 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22 , 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20 , 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23 , 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21 )\n\n# K-values verified to be correct.\n K <- as.bigz(2^32 * abs(sin(1:64))) \n\n# IETF shows these values as-is, i.e. don't change Endian on these calculations\n#ietf.org page says A is 01234567, ie lower bytes first. \n\nadef <- '67452301'\nbdef <- 'efcdab89'\ncdef <- '98badcfe'\nddef <- '10325476'\n\n# then optionally\nadef <- flip32(adef)\nbdef <- flip32(bdef)\ncdef <- flip32(cdef)\nddef <- flip32(ddef)\n\na0 <- (base2base(adef,16,10))[[1]]\nb0 <-(base2base(bdef,16,10))[[1]]\nc0 <-(base2base(cdef,16,10))[[1]]\nd0 <-(base2base(ddef,16,10))[[1]]\n\n# append 0x80 and pad with 0x00 bytes so that the message length in bytes \xe2\x89\xa1 56 (mod 64).\n origlenbit <- length(msg) * 8\n msg <- c(msg, as.raw('0x80') )\n bytemod <- length(msg)%%64\n raw00 <- as.raw('0x00')\n# append '0x00' 56-bytemod times\n if (bytemod < 56 && bytemod > 0 ) {\n msg <- c(msg,rep(raw00,times=(56-bytemod)))\n } else if (bytemod > 56) {\n msg <- c(msg, rep(raw00, times = bytemod-56))\n }\n \n # https://datatracker.ietf.org/doc/html/rfc1321#section-3.4 says\n # A 64-bit representation of b (the length of the message before the\n # padding bits were added) is appended to the result of the previous\n # step. In the unlikely event that b is greater than 2^64, then only\n # the low-order 64 bits of b are used. (These bits are appended as two\n # 32-bit words and appended low-order word first in accordance with the\n # previous conventions.)\n len16 <- base2base(origlenbit,10,16)[[1]]\n lenbit2 <- base2base(origlenbit,10,2)[[1]]\n if(nchar(lenbit2) < 64 ){\n lenbit2 <- paste0(c(rep('0',times = 64 -nchar(lenbit2)) ,lenbit2), collapse='')\n } else if (nchar(lenbit2) > 64) lenbit2 <- rev(rev(lenbit2)[1:64])\n newbytes2 <- substring(lenbit2, seq(1,nchar(lenbit2)-7,by=8), last = seq(8, nchar(lenbit2),by = 8) ) \n newdec <- base2base(newbytes2,2,10)\n newdbl <- rep(0,times= 8)\n for (jd in 1: 8) newdbl[jd] <- as.numeric(newdec[[jd]])\n newraw <- as.raw(newdbl)\n# which order is correct? \n# msg <- c(msg,newraw[c(7,8,5,6,3,4,1,2)])\n msg <- c(msg, newraw[5:8], newraw[1:4])\n# msg <- c(msg, newraw[1:4], newraw[5:8])\n chunkit <- as.bigz(as.numeric(msg ) ) # all these will be <= 0xff\n## turn groups of 512 BITS of msg into 16 words of 32 bits each.\n for(jchunk in 1: (length(msg)/64)) { \n thischunk <- matrix(chunkit[ ((jchunk-1)*64)+(1:64)],ncol=16)\n# should I reverse the bytes of this chunk? \n thisM <- as.bigz(rep(0,16) )\n for(jm in 1:16) thisM[jm] <- sum(thischunk[,jm] * c(256^3,256^2,256,1) )\n# for(jm in 1:16) thisM[jm] <- sum(thischunk[,jm] * c(1,256,256^2,256^3) )\n # Initialize hash value for this chunk:\n Ah<- a0\n Bh<- b0\n Ch<- c0\n Dh<- d0\n#now the chunk loop, which I make into 4 sep'te loops \n for (jone in 1:16) {\n Fh <- bigOr (bigAnd(Bh,Ch, inTwo=FALSE)[[1]] ,bigAnd(bigNot(Bh, inTwo=FALSE, outTwo=FALSE)[[1]],Dh, inTwo=FALSE)[[1]] )[[1]]\n g <- jone\n Fh<- (Fh + Ah + K[jone] + thisM[g]) %%two32 # truncate\n Ah<- Dh\n Dh<- Ch\n Ch<- Bh\n Bh<- (Bh + bigRotate(Fh, sidx[jone], inTwo=FALSE)[[1]] )%%two32 \n }\n\n for (j2 in 17:32) {\n Fh <- bigOr( bigAnd(Dh,Bh,inTwo=FALSE)[[1]] ,bigAnd(bigNot(Dh[[1]],inTwo=FALSE, outTwo=FALSE), Ch, inTwo=FALSE)[[1]] ,inTwo=FALSE)[[1]] \n g <-(1 + 5*(j2-1))%%16 +1 # plus one due to indexing from 1\n Fh<- (Fh + Ah + K[j2] + thisM[g])%%two32 \n Ah<- Dh\n Dh<- Ch\n Ch<- Bh\n Bh<- (Bh + bigRotate(Fh, sidx[j2], inTwo=FALSE)[[1]])%%two32 \n }\n for(j3 in 33:48){\n Fh <- bigXor(Bh,bigXor(Ch,Dh, inTwo=FALSE)[[1]], inTwo=FALSE)[[1]]\n g <-(5 + 3*(j3-1))%%16 +1\n Fh<- (Fh + Ah + K[j3] + thisM[g])%%two32 \n Ah<- Dh\n Dh<- Ch\n Ch<- Bh\n Bh<- (Bh + bigRotate(Fh, sidx[j3],inTwo=FALSE)[[1]])%%two32 \n }\n for(j4 in 49:64){\n Fh <- bigXor(Ch,bigOr(Bh,bigNot(Dh,inTwo=FALSE, outTwo=FALSE)[[1]],inTwo=FALSE)[[1]],inTwo=FALSE)[[1]]\n g <- (7 * (j4 -1))%%16 +1\n Fh<- (Fh + Ah + K[j4] + thisM[g])%%two32 \n Ah<- Dh\n Dh<- Ch\n Ch<- Bh\n Bh<- (Bh + bigRotate(Fh, sidx[j4], inTwo=FALSE)[[1]])%%two32 \n }\n # Add this chunk's hash to result so far:\n a0<- (a0 + Ah)%%two32\n b0<- (b0 + Bh)%%two32\n c0<- (c0 + Ch)%%two32\n d0<- (d0 + Dh)%%two32\n } # end of jchunk\n\n#finally, string the values together.\n# watch for those leading zeros\na0x <-(base2base(a0,10,16))\na0x <- paste0(c(rep(0,times=max(8-(nchar(a0x)),0)), a0x), collapse='')\nb0x <-(base2base(b0,10,16))\nb0x <- paste0(c(rep(0,times=max(8-(nchar(b0x)),0)), b0x), collapse='')\nc0x <-(base2base(c0,10,16))\nc0x <- paste0(c(rep(0,times=max(8-(nchar(c0x)),0)), c0x), collapse='')\nd0x <-(base2base(d0,10,16))\nd0x <- paste0(c(rep(0,times=max(8-(nchar(d0x)),0)), d0x), collapse='')\n\n\nthesum <- paste0(c(a0x,b0x,c0x,d0x) , sep='',collapse='')\nreturn(thesum)\n} \n\n# helper func to test reordering bytes. e.g. 01234567 from 67452301\n\nflip32 <- function(x){\n xsep <- unlist(strsplit(x,'') )\n xout <- paste0(c(xsep[c(7,8,5,6,3,4,1,2)]),collapse='')\n return(xout)\n}\nflip16 <- function(x) {\n xsep <- unlist(strsplit(x,''))\n xout <- paste0(c(xsep[c(3,4,1,2)]),collapse='')\n return(xout)\n}\n\n\nbigRotate <- function(x, shift, inBase = 10,binSize = 32, outBase = 10, inTwosComp = TRUE) {\n#default binary size is 32 to match bitwShiftR \n shift = floor(shift[1])\n\n out <- rep('0',times = length(x)) # gmp::as.bigz(rep(0,times=length(x)))\n for(jj in 1:length(x)) {\n bintmp <- base2base(x[[jj]],inBase,2, binSize = binSize, inTwosComp = inTwosComp, outTwosComp = FALSE)[[1]]\n#now all inputs will have a neg sign if negative.\n isPos = TRUE\n if(length(grep('-',bintmp)) ) {\n isPos = FALSE\n bintmp <- gsub('^-','',bintmp)\n }\n bintmp <- strsplit(bintmp,'')[[1]]\n xlen = length(bintmp)\n # now rotate the bits. \n otmp <- unlist(paste0(c(bintmp[ ((1:xlen) + shift -1) %%(xlen) + 1]),collapse=''))\n out[jj] <- base2base(otmp, 2, outBase, binSize=binSize, outTwosComp = FALSE, classOut = "character")[[1]]\n if(!isPos) out[jj] <- paste0('-',out[jj], collapse='')\n }\n # provide output in source 'inBase'\n if(is(x,'mpfr') || is(x,'mpfr1')) {\n out <- Rmpfr::.bigz2mpfr(gmp::as.bigz(out))\n } else if(is(x,'numeric')) {\n out <- as.numeric(out)\n } else if(is(x,'bigz')) out <- gmp::as.bigz(out)\n \n return(out) \n}\nRun Code Online (Sandbox Code Playgroud)\n
下面列出了一些问题。
flip32它被重写为
msg <- c(msg, as.raw('0x80') )
while (length(msg) %% 64 != 56) {
msg <- c(msg, as.raw(0))
}
Run Code Online (Sandbox Code Playgroud)
thischunk需要颠倒。thischunk <- thischunk[nrow(thischunk):1,]
Run Code Online (Sandbox Code Playgroud)
bigRotate这可以表示为((x<<amount) | (x>>(32-amount))) & 0xFFFFFFFF因此它被重写为
bigRotate <- function(x, amount) {
two32 <- as.bigz(2^(as.bigz(32)))
x <- x %% two32
lshift <- bigShiftL(x, amount)
rshift <- bigShiftR(x, 32-amount)
# bigOr errors if either value is zero so handle those cases explicitly
if (lshift == 0) {
rshift
} else if (rshift == 0) {
lshift
} else {
bigOr(lshift, rshift) %% two32
}
}
Run Code Online (Sandbox Code Playgroud)
a、b、c、 的组合d并交换了字节序。swap_endianness <- function(hex) {
chars <- strsplit(hex, "")[[1]]
pairs <- paste0(chars[c(TRUE, FALSE)], chars[c(FALSE, TRUE)])
paste0(rev(pairs), collapse = "")
}
thesum <- sum(
bigShiftL(a0, 32 * 0),
bigShiftL(b0, 32 * 1),
bigShiftL(c0, 32 * 2),
bigShiftL(d0, 32 * 3)
)
hex <- base2base(thesum, frombase = 10, tobase = 16)[[1]]
output <- swap_endianness(hex)
Run Code Online (Sandbox Code Playgroud)
实施上述更改会产生
library(bigBits)
library(gmp)
mymd5 <- function(msg){
if(!is(msg,'raw')) msg <- charToRaw(as.character(msg))
# a const to be used when truncating values > ffffffff
two32 <- as.bigz(2^(as.bigz(32)))
sidx <- c(7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22 , 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20 , 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23 , 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21)
K <- as.bigz(2^32 * abs(sin(1:64)))
adef <- '67452301'
bdef <- 'efcdab89'
cdef <- '98badcfe'
ddef <- '10325476'
a0 <- base2base(adef, 16, 10)[[1]]
b0 <- base2base(bdef, 16, 10)[[1]]
c0 <- base2base(cdef, 16, 10)[[1]]
d0 <- base2base(ddef, 16, 10)[[1]]
origlenbit <- length(msg) * 8
msg <- c(msg, as.raw('0x80') )
while (length(msg) %% 64 != 56) {
msg <- c(msg, as.raw(0))
}
length_bytes <- writeBin(as.integer(origlenbit), raw(), size = 8, endian = "little")
msg <- c(msg, length_bytes, rep(0, length.out = 8 - length(length_bytes)))
chunkit <- as.bigz(as.numeric(msg ) ) # all these will be <= 0xff
## turn groups of 512 BITS of msg into 16 words of 32 bits each.
for(jchunk in 1: (length(msg)/64)) {
thischunk <- matrix(chunkit[ ((jchunk-1)*64)+(1:64)],ncol=16)
thischunk <- thischunk[nrow(thischunk):1,]
thisM <- as.bigz(rep(0,16) )
for(jm in 1:16) thisM[jm] <- sum(thischunk[,jm] * c(256^3,256^2,256,1))
# Initialize hash value for this chunk:
Ah <- a0
Bh <- b0
Ch <- c0
Dh <- d0
#now the chunk loop, which I make into 4 sep'te loops
for (jone in 1:16) {
Fh <- bigOr (bigAnd(Bh,Ch, inTwo=FALSE)[[1]] ,bigAnd(bigNot(Bh, inTwo=FALSE, outTwo=FALSE)[[1]],Dh, inTwo=FALSE)[[1]] )[[1]]
g <- jone
Fh <- (Fh + Ah + K[jone] + thisM[g]) %% two32
Ah <- Dh
Dh <- Ch
Ch <- Bh
Bh <- (Bh + bigRotate(Fh, sidx[jone])) %% two32
}
for (j2 in 17:32) {
Fh <- bigOr( bigAnd(Dh,Bh,inTwo=FALSE)[[1]] ,bigAnd(bigNot(Dh[[1]],inTwo=FALSE, outTwo=FALSE), Ch, inTwo=FALSE)[[1]] ,inTwo=FALSE)[[1]]
g <-(1 + 5*(j2-1))%%16 +1 # plus one due to indexing from 1
Fh <- (Fh + Ah + K[j2] + thisM[g]) %% two32
Ah <- Dh
Dh <- Ch
Ch <- Bh
Bh <- (Bh + bigRotate(Fh, sidx[j2])) %% two32
}
for(j3 in 33:48){
Fh <- bigXor(Bh,bigXor(Ch,Dh, inTwo=FALSE)[[1]], inTwo=FALSE)[[1]]
g <-(5 + 3*(j3-1))%%16 +1
Fh<- (Fh + Ah + K[j3] + thisM[g]) %% two32
Ah<- Dh
Dh<- Ch
Ch<- Bh
Bh<- (Bh + bigRotate(Fh, sidx[j3]))%%two32
}
for(j4 in 49:64){
Fh <- bigXor(Ch,bigOr(Bh,bigNot(Dh,inTwo=FALSE, outTwo=FALSE)[[1]],inTwo=FALSE)[[1]],inTwo=FALSE)[[1]]
g <- (7 * (j4 -1))%%16 +1
Fh<- (Fh + Ah + K[j4] + thisM[g]) %% two32
Ah<- Dh
Dh<- Ch
Ch<- Bh
Bh<- (Bh + bigRotate(Fh, sidx[j4])) %% two32
}
# Add this chunk's hash to result so far:
a0 <- (a0 + Ah) %% two32
b0 <- (b0 + Bh) %% two32
c0 <- (c0 + Ch) %% two32
d0 <- (d0 + Dh) %% two32
} # end of jchunk
thesum <- sum(
bigShiftL(a0, 32 * 0),
bigShiftL(b0, 32 * 1),
bigShiftL(c0, 32 * 2),
bigShiftL(d0, 32 * 3)
)
hex <- base2base(thesum, frombase = 10, tobase = 16)[[1]]
swap_endianness(hex)
}
# ((x<<amount) | (x>>(32-amount))) & 0xFFFFFFFF
bigRotate <- function(x, amount) {
two32 <- as.bigz(2^(as.bigz(32)))
x <- x %% two32
lshift <- bigShiftL(x, amount)
rshift <- bigShiftR(x, 32-amount)
# bigOr errors if either value is zero so handle those cases explicitly
if (lshift == 0) {
rshift
} else if (rshift == 0) {
lshift
} else {
bigOr(lshift, rshift) %% two32
}
}
swap_endianness <- function(hex) {
chars <- strsplit(hex, "")[[1]]
pairs <- paste0(chars[c(TRUE, FALSE)], chars[c(FALSE, TRUE)])
paste0(rev(pairs), collapse = "")
}
Run Code Online (Sandbox Code Playgroud)
通过了一些单元测试
library(testthat)
test_that("md5 works", {
expect_equal(mymd5(""), "d41d8cd98f00b204e9800998ecf8427e")
expect_equal(mymd5("a"), "0cc175b9c0f1b6a831c399e269772661")
expect_equal(mymd5("abc"), "900150983cd24fb0d6963f7d28e17f72")
expect_equal(mymd5("message digest"), "f96b697d7cb7938d525a2f31aaf161d0")
expect_equal(mymd5("abcdefghijklmnopqrstuvwxyz"), "c3fcd3d76192e4007dfb496cca67e13b")
expect_equal(mymd5("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), "d174ab98d277d9f5a5611c2c9f419d9f")
expect_equal(mymd5("12345678901234567890123456789012345678901234567890123456789012345678901234567890"), "57edf4a22be3c955ac49da2e2107b67a")
})
#> Test passed
Run Code Online (Sandbox Code Playgroud)