md5哈希计算函数失败

Car*_*oft 5 md5 r endianness md5sum

我正在尝试编写一个 md5 哈希函数而不R调用任何 C 例程。虽然我的代码执行得很好,但输出永远不会匹配tools::md5sum(它确实与各种在线文档中提供的示例匹配)。我怀疑某个地方存在字节顺序(或字顺序)问题;正如下面提供的代码所示,我尝试插入一些触发器,但没有成功。\n我检查了输出中是否存在简单的不匹配,例如正确的字节,但顺序错误,但没有成功。

\n

要运行此函数,您需要库Rmpfr以及bigBits此处提供的函数(flip32flip16bigRotate)。

\n
# need these to run:\nlibrary(Rmpfr)\nlibrary(bigBits)\n\nmymd5 <- function(msg){\n\n  if(!is(msg,'raw')) msg <- charToRaw(as.character(msg))\n# a const to be used when truncating values > ffffffff\ntwo32 <- as.bigz(2^(as.bigz(32))) \n\nsidx <- c( 7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22 , 5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20 , 4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23 , 6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21 )\n\n#  K-values verified to be correct.\n K <- as.bigz(2^32 * abs(sin(1:64))) \n\n# IETF  shows these values as-is, i.e. don't change Endian on these calculations\n#ietf.org page says A is 01234567, ie lower bytes first.  \n\nadef <- '67452301'\nbdef <- 'efcdab89'\ncdef <- '98badcfe'\nddef <- '10325476'\n\n# then optionally\nadef <- flip32(adef)\nbdef <- flip32(bdef)\ncdef <- flip32(cdef)\nddef <- flip32(ddef)\n\na0 <- (base2base(adef,16,10))[[1]]\nb0 <-(base2base(bdef,16,10))[[1]]\nc0 <-(base2base(cdef,16,10))[[1]]\nd0 <-(base2base(ddef,16,10))[[1]]\n\n#  append 0x80  and pad with 0x00 bytes so that the message length in bytes \xe2\x89\xa1 56 (mod 64).\n  origlenbit <- length(msg) * 8\n  msg <- c(msg, as.raw('0x80') )\n  bytemod <- length(msg)%%64\n  raw00 <- as.raw('0x00')\n#  append  '0x00' 56-bytemod times\n    if (bytemod <  56 && bytemod > 0 ) {\n        msg <- c(msg,rep(raw00,times=(56-bytemod)))\n    } else if (bytemod > 56) {\n        msg <- c(msg, rep(raw00, times = bytemod-56))\n    }\n    \n # https://datatracker.ietf.org/doc/html/rfc1321#section-3.4 says\n #  A 64-bit representation of b (the length of the message before the\n   # padding bits were added) is appended to the result of the previous\n   # step. In the unlikely event that b is greater than 2^64, then only\n   # the low-order 64 bits of b are used. (These bits are appended as two\n   # 32-bit words and appended low-order word first in accordance with the\n   # previous conventions.)\n    len16 <-    base2base(origlenbit,10,16)[[1]]\n    lenbit2 <- base2base(origlenbit,10,2)[[1]]\n    if(nchar(lenbit2) < 64 ){\n        lenbit2 <- paste0(c(rep('0',times = 64 -nchar(lenbit2)) ,lenbit2), collapse='')\n    } else if (nchar(lenbit2) > 64) lenbit2 <- rev(rev(lenbit2)[1:64])\n    newbytes2 <- substring(lenbit2, seq(1,nchar(lenbit2)-7,by=8), last = seq(8, nchar(lenbit2),by = 8) )    \n    newdec <- base2base(newbytes2,2,10)\n    newdbl <- rep(0,times= 8)\n    for (jd in 1: 8) newdbl[jd] <- as.numeric(newdec[[jd]])\n    newraw <- as.raw(newdbl)\n#  which order is correct? \n#   msg <- c(msg,newraw[c(7,8,5,6,3,4,1,2)])\n    msg <- c(msg, newraw[5:8], newraw[1:4])\n#   msg <- c(msg, newraw[1:4], newraw[5:8])\n    chunkit <- as.bigz(as.numeric(msg ) ) # all these will be <= 0xff\n##  turn groups of 512 BITS of msg into 16 words of 32 bits each.\n    for(jchunk in 1: (length(msg)/64)) {  \n        thischunk <- matrix(chunkit[ ((jchunk-1)*64)+(1:64)],ncol=16)\n#  should I reverse the bytes of this chunk? \n        thisM <- as.bigz(rep(0,16) )\n        for(jm in 1:16) thisM[jm] <- sum(thischunk[,jm] * c(256^3,256^2,256,1) )\n#       for(jm in 1:16) thisM[jm] <- sum(thischunk[,jm] * c(1,256,256^2,256^3) )\n      # Initialize hash value for this chunk:\n         Ah<- a0\n         Bh<- b0\n         Ch<- c0\n         Dh<- d0\n#now the chunk loop, which I make into 4 sep'te loops \n        for (jone in 1:16) {\n            Fh <- bigOr (bigAnd(Bh,Ch, inTwo=FALSE)[[1]] ,bigAnd(bigNot(Bh, inTwo=FALSE, outTwo=FALSE)[[1]],Dh, inTwo=FALSE)[[1]] )[[1]]\n            g <- jone\n            Fh<- (Fh + Ah + K[jone] + thisM[g]) %%two32  # truncate\n            Ah<- Dh\n            Dh<- Ch\n            Ch<- Bh\n            Bh<- (Bh + bigRotate(Fh, sidx[jone], inTwo=FALSE)[[1]] )%%two32   \n        }\n\n        for (j2 in 17:32) {\n            Fh <- bigOr( bigAnd(Dh,Bh,inTwo=FALSE)[[1]] ,bigAnd(bigNot(Dh[[1]],inTwo=FALSE, outTwo=FALSE), Ch, inTwo=FALSE)[[1]] ,inTwo=FALSE)[[1]] \n            g <-(1 + 5*(j2-1))%%16 +1 # plus one due to indexing from 1\n            Fh<- (Fh + Ah + K[j2] + thisM[g])%%two32  \n            Ah<- Dh\n            Dh<- Ch\n            Ch<- Bh\n            Bh<- (Bh + bigRotate(Fh, sidx[j2], inTwo=FALSE)[[1]])%%two32   \n        }\n        for(j3 in 33:48){\n            Fh <- bigXor(Bh,bigXor(Ch,Dh, inTwo=FALSE)[[1]], inTwo=FALSE)[[1]]\n            g <-(5 + 3*(j3-1))%%16 +1\n            Fh<- (Fh + Ah + K[j3] + thisM[g])%%two32  \n            Ah<- Dh\n            Dh<- Ch\n            Ch<- Bh\n            Bh<- (Bh + bigRotate(Fh, sidx[j3],inTwo=FALSE)[[1]])%%two32 \n        }\n        for(j4 in 49:64){\n            Fh <- bigXor(Ch,bigOr(Bh,bigNot(Dh,inTwo=FALSE, outTwo=FALSE)[[1]],inTwo=FALSE)[[1]],inTwo=FALSE)[[1]]\n            g <- (7 * (j4 -1))%%16 +1\n            Fh<- (Fh + Ah + K[j4] + thisM[g])%%two32  \n            Ah<- Dh\n            Dh<- Ch\n            Ch<- Bh\n            Bh<- (Bh + bigRotate(Fh, sidx[j4], inTwo=FALSE)[[1]])%%two32 \n        }\n           # Add this chunk's hash to result so far:\n        a0<- (a0 + Ah)%%two32\n        b0<- (b0 + Bh)%%two32\n        c0<- (c0 + Ch)%%two32\n        d0<- (d0 + Dh)%%two32\n    }   # end of jchunk\n\n#finally, string the values together.\n# watch for those leading zeros\na0x <-(base2base(a0,10,16))\na0x <- paste0(c(rep(0,times=max(8-(nchar(a0x)),0)), a0x), collapse='')\nb0x <-(base2base(b0,10,16))\nb0x <- paste0(c(rep(0,times=max(8-(nchar(b0x)),0)), b0x), collapse='')\nc0x <-(base2base(c0,10,16))\nc0x <- paste0(c(rep(0,times=max(8-(nchar(c0x)),0)), c0x), collapse='')\nd0x <-(base2base(d0,10,16))\nd0x <- paste0(c(rep(0,times=max(8-(nchar(d0x)),0)), d0x), collapse='')\n\n\nthesum <- paste0(c(a0x,b0x,c0x,d0x) , sep='',collapse='')\nreturn(thesum)\n} \n\n# helper func to test reordering bytes. e.g.  01234567 from 67452301\n\nflip32 <- function(x){\n    xsep <- unlist(strsplit(x,'') )\n    xout <- paste0(c(xsep[c(7,8,5,6,3,4,1,2)]),collapse='')\n    return(xout)\n}\nflip16 <- function(x) {\n        xsep <- unlist(strsplit(x,''))\n    xout <- paste0(c(xsep[c(3,4,1,2)]),collapse='')\n    return(xout)\n}\n\n\nbigRotate <- function(x, shift,  inBase = 10,binSize = 32, outBase = 10, inTwosComp = TRUE) {\n#default binary size is 32 to match bitwShiftR  \n shift = floor(shift[1])\n\n out <- rep('0',times = length(x))  # gmp::as.bigz(rep(0,times=length(x)))\n    for(jj in 1:length(x)) {\n        bintmp <- base2base(x[[jj]],inBase,2, binSize = binSize, inTwosComp = inTwosComp, outTwosComp = FALSE)[[1]]\n#now all inputs will have a neg sign if negative.\n        isPos = TRUE\n        if(length(grep('-',bintmp)) ) {\n            isPos = FALSE\n            bintmp <- gsub('^-','',bintmp)\n        }\n        bintmp <- strsplit(bintmp,'')[[1]]\n        xlen = length(bintmp)\n    # now rotate the bits.  \n        otmp  <- unlist(paste0(c(bintmp[ ((1:xlen) + shift -1) %%(xlen) + 1]),collapse=''))\n        out[jj] <- base2base(otmp, 2, outBase, binSize=binSize, outTwosComp = FALSE, classOut = "character")[[1]]\n        if(!isPos) out[jj] <- paste0('-',out[jj], collapse='')\n    }\n    # provide output in source 'inBase'\n    if(is(x,'mpfr') || is(x,'mpfr1')) {\n        out <- Rmpfr::.bigz2mpfr(gmp::as.bigz(out))\n    } else if(is(x,'numeric')) {\n        out <- as.numeric(out)\n    } else if(is(x,'bigz')) out <- gmp::as.bigz(out)\n    \n return(out) \n}\n
Run Code Online (Sandbox Code Playgroud)\n

Pau*_*aul 5

下面列出了一些问题。

1. 删除flip32

2. 附加填充位的问题

它被重写为

msg <- c(msg, as.raw('0x80') )
while (length(msg) %% 64 != 56) {
  msg <- c(msg, as.raw(0))
}
Run Code Online (Sandbox Code Playgroud)

3. 矩阵的行thischunk需要颠倒。

thischunk <- thischunk[nrow(thischunk):1,]
Run Code Online (Sandbox Code Playgroud)

4. 问题bigRotate

这可以表示为((x<<amount) | (x>>(32-amount))) & 0xFFFFFFFF因此它被重写为

bigRotate <- function(x, amount) {
  two32 <- as.bigz(2^(as.bigz(32)))
  x <- x %% two32
  lshift <- bigShiftL(x, amount)
  rshift <- bigShiftR(x, 32-amount)
  
  # bigOr errors if either value is zero so handle those cases explicitly
  if (lshift == 0) {
    rshift
  } else if (rshift == 0) {
    lshift
  } else {
    bigOr(lshift, rshift) %% two32
  }
}
Run Code Online (Sandbox Code Playgroud)

5. 重写了abc、 的组合d并交换了字节序。

swap_endianness <- function(hex) {
  chars <- strsplit(hex, "")[[1]]
  pairs <- paste0(chars[c(TRUE, FALSE)], chars[c(FALSE, TRUE)])
  paste0(rev(pairs), collapse = "")
}

thesum <- sum(
    bigShiftL(a0, 32 * 0),
    bigShiftL(b0, 32 * 1),
    bigShiftL(c0, 32 * 2),
    bigShiftL(d0, 32 * 3)
)
hex <- base2base(thesum, frombase = 10, tobase = 16)[[1]]
output <- swap_endianness(hex)
Run Code Online (Sandbox Code Playgroud)

更正代码

实施上述更改会产生

library(bigBits)
library(gmp)


mymd5 <- function(msg){
  if(!is(msg,'raw')) msg <- charToRaw(as.character(msg))
  
  # a const to be used when truncating values > ffffffff
  two32 <- as.bigz(2^(as.bigz(32)))
  sidx <- c(7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22 , 5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20 , 4, 11, 16, 23,  4, 11, 16, 23, 4, 11, 16, 23,  4, 11, 16, 23 , 6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21)
  K <- as.bigz(2^32 * abs(sin(1:64))) 
  
  adef <- '67452301'
  bdef <- 'efcdab89'
  cdef <- '98badcfe'
  ddef <- '10325476'
  
  a0 <- base2base(adef, 16, 10)[[1]]
  b0 <- base2base(bdef, 16, 10)[[1]]
  c0 <- base2base(cdef, 16, 10)[[1]]
  d0 <- base2base(ddef, 16, 10)[[1]]

  origlenbit <- length(msg) * 8
  
  msg <- c(msg, as.raw('0x80') )
  while (length(msg) %% 64 != 56) {
    msg <- c(msg, as.raw(0))
  }

  length_bytes <- writeBin(as.integer(origlenbit), raw(), size = 8, endian = "little")
  msg <- c(msg, length_bytes, rep(0, length.out = 8 - length(length_bytes)))
  
  chunkit <- as.bigz(as.numeric(msg ) ) # all these will be <= 0xff
  
  ##  turn groups of 512 BITS of msg into 16 words of 32 bits each.
  for(jchunk in 1: (length(msg)/64)) {  
    thischunk <- matrix(chunkit[ ((jchunk-1)*64)+(1:64)],ncol=16)
    thischunk <- thischunk[nrow(thischunk):1,]
    thisM <- as.bigz(rep(0,16) )
    for(jm in 1:16) thisM[jm] <- sum(thischunk[,jm] * c(256^3,256^2,256,1))

    # Initialize hash value for this chunk:
    Ah <- a0
    Bh <- b0
    Ch <- c0
    Dh <- d0
    
    #now the chunk loop, which I make into 4 sep'te loops 
    for (jone in 1:16) {
      Fh <- bigOr (bigAnd(Bh,Ch, inTwo=FALSE)[[1]] ,bigAnd(bigNot(Bh, inTwo=FALSE, outTwo=FALSE)[[1]],Dh, inTwo=FALSE)[[1]] )[[1]]
      g <- jone
      Fh <- (Fh + Ah + K[jone] + thisM[g]) %% two32
      Ah <- Dh
      Dh <- Ch
      Ch <- Bh
      Bh <- (Bh + bigRotate(Fh, sidx[jone])) %% two32   
    }
    
    for (j2 in 17:32) {
      Fh <- bigOr( bigAnd(Dh,Bh,inTwo=FALSE)[[1]] ,bigAnd(bigNot(Dh[[1]],inTwo=FALSE, outTwo=FALSE), Ch, inTwo=FALSE)[[1]] ,inTwo=FALSE)[[1]] 
      g <-(1 + 5*(j2-1))%%16 +1 # plus one due to indexing from 1
      Fh <- (Fh + Ah + K[j2] + thisM[g]) %% two32  
      Ah <- Dh
      Dh <- Ch
      Ch <- Bh
      Bh <- (Bh + bigRotate(Fh, sidx[j2])) %% two32
    }
    for(j3 in 33:48){
      Fh <- bigXor(Bh,bigXor(Ch,Dh, inTwo=FALSE)[[1]], inTwo=FALSE)[[1]]
      g <-(5 + 3*(j3-1))%%16 +1
      Fh<- (Fh + Ah + K[j3] + thisM[g]) %% two32  
      Ah<- Dh
      Dh<- Ch
      Ch<- Bh
      Bh<- (Bh + bigRotate(Fh, sidx[j3]))%%two32 
    }
    for(j4 in 49:64){
      Fh <- bigXor(Ch,bigOr(Bh,bigNot(Dh,inTwo=FALSE, outTwo=FALSE)[[1]],inTwo=FALSE)[[1]],inTwo=FALSE)[[1]]
      g <- (7 * (j4 -1))%%16 +1
      Fh<- (Fh + Ah + K[j4] + thisM[g]) %% two32  
      Ah<- Dh
      Dh<- Ch
      Ch<- Bh
      Bh<- (Bh + bigRotate(Fh, sidx[j4])) %% two32 
    }
    
    # Add this chunk's hash to result so far:
    a0 <- (a0 + Ah) %% two32
    b0 <- (b0 + Bh) %% two32
    c0 <- (c0 + Ch) %% two32
    d0 <- (d0 + Dh) %% two32
  }   # end of jchunk

  thesum <- sum(
    bigShiftL(a0, 32 * 0),
    bigShiftL(b0, 32 * 1),
    bigShiftL(c0, 32 * 2),
    bigShiftL(d0, 32 * 3)
  )
  
  hex <- base2base(thesum, frombase = 10, tobase = 16)[[1]]
  swap_endianness(hex)
}


# ((x<<amount) | (x>>(32-amount))) & 0xFFFFFFFF
bigRotate <- function(x, amount) {
  two32 <- as.bigz(2^(as.bigz(32)))
  x <- x %% two32
  lshift <- bigShiftL(x, amount)
  rshift <- bigShiftR(x, 32-amount)
  
  # bigOr errors if either value is zero so handle those cases explicitly
  if (lshift == 0) {
    rshift
  } else if (rshift == 0) {
    lshift
  } else {
    bigOr(lshift, rshift) %% two32
  }
}


swap_endianness <- function(hex) {
  chars <- strsplit(hex, "")[[1]]
  pairs <- paste0(chars[c(TRUE, FALSE)], chars[c(FALSE, TRUE)])
  paste0(rev(pairs), collapse = "")
}
Run Code Online (Sandbox Code Playgroud)

通过了一些单元测试

library(testthat)

test_that("md5 works", {
  expect_equal(mymd5(""), "d41d8cd98f00b204e9800998ecf8427e")
  expect_equal(mymd5("a"), "0cc175b9c0f1b6a831c399e269772661")
  expect_equal(mymd5("abc"), "900150983cd24fb0d6963f7d28e17f72")
  expect_equal(mymd5("message digest"), "f96b697d7cb7938d525a2f31aaf161d0")
  expect_equal(mymd5("abcdefghijklmnopqrstuvwxyz"), "c3fcd3d76192e4007dfb496cca67e13b")
  expect_equal(mymd5("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), "d174ab98d277d9f5a5611c2c9f419d9f")
  expect_equal(mymd5("12345678901234567890123456789012345678901234567890123456789012345678901234567890"), "57edf4a22be3c955ac49da2e2107b67a")
})
#> Test passed
Run Code Online (Sandbox Code Playgroud)