Mot*_*sim 8 java optimization performance
我有以下代码执行数组中位的循环移位:
private static void method1(byte[] bytes) {
byte previousByte = bytes[0];
bytes[0] = (byte) (((bytes[0] & 0xff) >> 1) | ((bytes[bytes.length - 1] & 0xff) << 7));
for (int i = 1; i < bytes.length; i++) {
byte tmp = bytes[i];
bytes[i] = (byte) (((bytes[i] & 0xff) >> 1) | ((previousByte & 0xff) << 7));
previousByte = tmp;
}
}
Run Code Online (Sandbox Code Playgroud)
然后我认为这样做更容易,更易读:
private static void method2(byte[] bytes) {
byte lastByte = bytes[bytes.length-1];
for (int i = bytes.length-1; i > 0; i--) {
bytes[i] = (byte) (((bytes[i] & 0xff) >> 1) | ((bytes[i-1] & 0xff) << 7));
}
bytes[0] = (byte) (((bytes[0] & 0xff) >> 1) | ((lastByte & 0xff) << 7));
}
Run Code Online (Sandbox Code Playgroud)
但我注意到第二个(method2)比第一个(method1)慢!我注意到了差异,因为我正在调用这种方法数千次.所以我做了一个测试以确保这里是20次测试每次调用3000次的平均结果(并且字节数是1百万):
method1 average : 4s 572ms
method2 average : 5s 630ms
Run Code Online (Sandbox Code Playgroud)
所以我的问题是:为什么第一个比第二个快?
这是测试代码,以确保我没有做我的测试错误:
import java.math.BigInteger;
public class BitShiftTests {
public static void main(String[] args) {
int numOfTests = 20;
int numberOfShifts = 3000;
byte[] numbers = new byte[1000000];
for (int i = 0; i < numbers.length; i++) {
numbers[i] = (byte) (i % 255);
}
System.out.println("Testing method1...");
BigInteger method1Sum = new BigInteger("00000000", 2);
for (int i = 1; i <= numOfTests; i++) {
long total = 0L;
for (int j = 0; j < numberOfShifts; j++) {
long startTime = System.nanoTime();
method1(numbers);
long endTime = System.nanoTime();
total = total + (endTime - startTime);
}
method1Sum = method1Sum.add(new BigInteger(Long.toString(total), 10));
System.out.println(String.format("%-2d: %s", i, getTime(total)));
}
System.out.println("Testing method2...");
BigInteger method2Sum = new BigInteger("00000000", 2);
for (int i = 1; i <= numOfTests; i++) {
long total = 0L;
for (int j = 0; j < numberOfShifts; j++) {
long startTime = System.nanoTime();
method2(numbers);
long endTime = System.nanoTime();
total = total + (endTime - startTime);
}
method2Sum = method2Sum.add(new BigInteger(Long.toString(total), 10));
System.out.println(String.format("%-2d: %s", i, getTime(total)));
}
System.out.println("method1 average : " + getTime(method1Sum.longValue() / numOfTests));
System.out.println("method2 average : " + getTime(method2Sum.longValue() / numOfTests));
}
private static void method1(byte[] bytes) {
byte previousByte = bytes[0];
bytes[0] = (byte) (((bytes[0] & 0xff) >> 1) | ((bytes[bytes.length - 1] & 0xff) << 7));
for (int i = 1; i < bytes.length; i++) {
byte tmp = bytes[i];
bytes[i] = (byte) (((bytes[i] & 0xff) >> 1) | ((previousByte & 0xff) << 7));
previousByte = tmp;
}
}
private static void method2(byte[] bytes) {
byte lastByte = bytes[bytes.length-1];
for (int i = bytes.length-1; i > 0; i--) {
bytes[i] = (byte) (((bytes[i] & 0xff) >> 1) | ((bytes[i-1] & 0xff) << 7));
}
bytes[0] = (byte) (((bytes[0] & 0xff) >> 1) | ((lastByte & 0xff) << 7));
}
private static String getTime(long nanoSecs) {
int minutes = (int) (nanoSecs / 60000000000.0);
int seconds = (int) (nanoSecs / 1000000000.0) - (minutes * 60);
int millisecs = (int) (((nanoSecs / 1000000000.0) - (seconds + minutes * 60)) * 1000);
int nanosecs = (int) nanoSecs - (millisecs * 1000000000);
if (minutes == 0 && seconds == 0 && millisecs == 0) {
return nanosecs + "ns";
}
if (minutes == 0 && seconds == 0) {
return millisecs + "ms";
}
if (minutes == 0 && millisecs == 0) {
return seconds + "s";
}
if (seconds == 0 && millisecs == 0) {
return minutes + "min";
}
if (minutes == 0) {
return seconds + "s " + millisecs + "ms";
}
if (seconds == 0) {
return minutes + "min " + millisecs + "ms";
}
if (millisecs == 0) {
return minutes + "min " + seconds + "s";
}
return minutes + "min " + seconds + "s " + millisecs + "ms";
}
}
Run Code Online (Sandbox Code Playgroud)
更新:
看起来原因是我在第二种方法中每个循环中访问2个不同的索引,而我在第一种方法中只访问了1个索引.所以它与反转循环无关.
谢谢@ rm5248和@Ben,如果可以的话,我会选择你的两个答案,但我选择了之前的答案.
我对此进行了快速测试,似乎第二种方法变慢的原因是因为算法改变了一点点.在第一个中,您将一个值保留在局部变量中,而您不在第二个值中.因此,Java必须两次转到数组才能获得变量.理论上,这应该没有任何不同,但我认为这与数组如何在Java中实现有关(我怀疑如果你在C中尝试它,时间会更接近).
作为参考,这是我的实现(我认为它做同样的事情,但它可能没有):
private static void method2(byte[] bytes) {
byte prevByte = bytes[bytes.length-1];
for (int i = bytes.length-1; i > 0; i--) {
byte tmp = bytes[i];
bytes[i] = (byte) (((bytes[i] & 0xff) >> 1) | ((prevByte & 0xff) << 7));
prevByte = tmp;
}
bytes[0] = (byte) (((bytes[0] & 0xff) >> 1) | ((bytes[bytes.length-1] & 0xff) << 7));
}
Run Code Online (Sandbox Code Playgroud)
这是我得到的平均时间:
method1 average : 6s 555ms
method2 average : 6s 726ms
Run Code Online (Sandbox Code Playgroud)