jVi*_*ent 9 opengl debugging graphics performance
我遇到了一个非常奇怪的性能问题.到目前为止,我已经将问题简化为:我在网格中渲染20x20x20立方体,使用glDrawElementsInstanced,只要我的相机远离原点就可以正常工作,但是当它接近原点时,它会开始磨削停了下来.
我通过以下方式定义模型视图投影矩阵:
float distance=3.8;
Projection = glm::perspective(65.0f, (float)(width)/height, 0.1f, 300.0f);
View = glm::lookAt( glm::vec3(0,0,-distance),
glm::vec3(0,0,10),
glm::vec3(0,1,0));
Model = glm::rotate(glm::mat4(1.0f), 0.0f, glm::vec3(0.25f, 1.0f,0.75f));
Run Code Online (Sandbox Code Playgroud)
距离为40,没有问题,但当距离减少到大约3.8和更低时,一切都停止了.
实际的渲染调用是通过以下方式执行的:
glBindVertexArray(cubeVAO);
glDrawElementsInstanced(GL_TRIANGLES, indices.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
Run Code Online (Sandbox Code Playgroud)
将所有顶点放在一个缓冲区中并通过调用渲染:
glBindVertexArray(nonInstancedVAO);
glDrawArrays(GL_TRIANGLES, 0,vertices.size() );
Run Code Online (Sandbox Code Playgroud)
完全删除行为.任何经历过类似行为的人都能指出我的解决方案吗?如果失败了,任何人都知道如何追踪这样的事情?我希望我能够使用gDEBugger确定导致速度减慢的原因,但是这只是重新确认没有任何其他opengl调用,并且无法确定是什么占用了所有处理时间.
另一个注意事项是glDrawArraysInstanced也显示相同的减速,并且将调用分成4个单独的调用,每个几何的四分之一也会停止减速.
更新
这是对问题的最小说明的尝试.
//Minimal reproduction of problem
#include <stdio.h>
#include <string>
#include <fstream>
#include <stdlib.h>
#include <string.h>
#include <GL/glew.h>
#include <GLFW/glfw3.h>
// Include GLM
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
#include <vector>
#include <iostream>
#include <stdio.h>
//Set to true to use instanced rendering (glDrawElementsInstanced), false to render a generated grid instead (glDrawElements)
#define Instanced true
//Translation from origin. Problme is pressent at 0 distance, but disapears at ex. 40.
const float distanceFromOrigin=0;
// Function to load shaders
GLuint LoadShaders(const char * vertex_file_path,const char * fragment_file_path);
int main(){
int width, height;
bool running = true;
// Initialise GLFW
glfwInit();
glfwWindowHint(GLFW_SAMPLES,1);
glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT,GL_TRUE);
glfwWindowHint(GLFW_VERSION_MAJOR, 4);
GLFWwindow* windowRef = glfwCreateWindow( 512, 512, "",0,0);
glfwMakeContextCurrent(windowRef);
glewInit();
//Load Shader
GLuint programID = LoadShaders( "Simple.vs.c", "Simple.fs.c" );
GLuint MatrixID = glGetUniformLocation(programID, "MVP");
glUseProgram(programID);
glm::mat4 Model,Projection,MVP,View,checkMVP;
std::vector<GLuint> sqIndice = {3,2,1,1,0,3,4,5,6,6,7,4,0,4,7,7,3,0,0,1,5,5,4,0,2,3,7,7,6,2,6,5,1,1,2,6,0,4,7,7,3,0};
std::vector<GLfloat> sqVertex = {-1, 1, -1, -1, 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, -1, 1, 1, 1, 1, -1, 1, 1, -1, -1};
std::vector<GLfloat> sqColor = {0.2472,0.24,0.6,0.6,0.24,0.442893,0.6,0.547014,0.24,0.24,0.6,0.33692,0.24,0.353173,0.6,0.6,0.24,0.563266,0.6,0.426641,0.24,0.263452,0.6,0.24};
const float lattice = 5;
const int mxn = 10;
std::vector<GLfloat> v1 = {lattice,-1,0};
std::vector<GLfloat> v2 = {1,lattice,0};
std::vector<GLfloat> v3 = {0,0,lattice};
std::vector<GLfloat> offset = {0,0,-distanceFromOrigin};
std::vector<GLfloat> latticePoints,sqVertexGrid,sqColorGrid;// = {0,0,0};
std::vector<GLuint> sqIndiceGrid;
// Looping stuff to generate the full grid of "instances" to render in a single call.
int instanceCount=0;
//Generate Lattice vectors, aswell as a vector containing the full grids of indices,vertexes and colors
for(int x=-mxn;x<mxn;++x){
for(int y=-mxn;y<mxn;++y){
for(int z=-mxn;z<mxn;++z){
for(int n=0;n<3;++n){
latticePoints.push_back( x*v1[n]+y*v2[n]+z*v3[n]+offset[n] );
};
for(int elm=0;elm<sqVertex.size();elm+=3){
for(int n=0;n<3;++n){
sqVertexGrid.push_back(sqVertex[elm+n]+x*v1[n]+y*v2[n]+z*v3[n]+offset[n]);
sqColorGrid.push_back(sqColor[elm+n]);
};
};
for(int elm=0;elm<sqIndice.size();++elm){
sqIndiceGrid.push_back(sqIndice[elm]+instanceCount*sqVertex.size()/3);
};
++instanceCount;glewInit
};
};
};
#if Instanced==true
//Initialize and fill vertex,color and indice buffers with the relevant data.
GLuint cubeVAO;
glGenVertexArrays(1, &cubeVAO);
glBindVertexArray(cubeVAO);
glEnable(GL_DEPTH_TEST);
//Vertex buffer
GLuint vertexBuffer;
glGenBuffers(1, &vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, sqVertex.size()*sizeof(GLfloat), &sqVertex[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0,3,GL_FLOAT,GL_FALSE,0,(void*)0);
//Color buffer
GLuint colorBuffer;
glGenBuffers(1, &colorBuffer);
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
glBufferData(GL_ARRAY_BUFFER, sqColor.size()*sizeof(GLfloat), &sqColor[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1,3,GL_FLOAT,GL_FALSE,0,(void*)0);
// Indice buffer
GLuint indicesBuffer;
glGenBuffers(1, &indicesBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indicesBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sqIndice.size()*sizeof(GLuint), &sqIndice[0], GL_STATIC_DRAW);
//Lattice point buffer
GLuint latticePointBuffer;
glGenBuffers(1, &latticePointBuffer);
glBindBuffer(GL_ARRAY_BUFFER, latticePointBuffer);
glBufferData(GL_ARRAY_BUFFER, latticePoints.size()*sizeof(GLfloat), &latticePoints[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(2);
glVertexAttribPointer(2,3,GL_FLOAT,GL_FALSE,0,(void*)0);
glVertexAttribDivisor(2,1);
glBindVertexArray(0);
#elif Instanced==false
GLuint cubeGridVAO;
glGenVertexArrays(1, &cubeGridVAO);
glBindVertexArray(cubeGridVAO);
glEnable(GL_DEPTH_TEST);
//Vertex buffer
GLuint vertexBuffer;
glGenBuffers(1, &vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, sqVertexGrid.size()*sizeof(GLfloat), &sqVertexGrid[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0,3,GL_FLOAT,GL_FALSE,0,(void*)0);
//Color buffer
GLuint colorBuffer;
glGenBuffers(1, &colorBuffer);
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
glBufferData(GL_ARRAY_BUFFER, sqColorGrid.size()*sizeof(GLfloat), &sqColorGrid[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1,3,GL_FLOAT,GL_FALSE,0,(void*)0);
// Indice buffer
GLuint indicesBuffer;
glGenBuffers(1, &indicesBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indicesBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sqIndiceGrid.size()*sizeof(GLuint), &sqIndiceGrid[0], GL_STATIC_DRAW);
glBindVertexArray(0);
#endif
while(running)
{
glfwGetFramebufferSize(windowRef, &width, &height);
height = height > 0 ? height : 1;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
Projection = glm::perspective(65.0f, (float)(width)/height, 0.1f, 300.0f);
View = glm::lookAt( glm::vec3(0.0f,0.0f,-(distanceFromOrigin+3.8f)),
glm::vec3(0.0f,0.0f,100.0f),
glm::vec3(0.0f,1.0f,0.0f));
Model = glm::rotate(glm::mat4(1.0f), 0.0f, glm::vec3(0.25f, 1.0f,0.75f));
MVP = Projection*View*Model;
glUniformMatrix4fv(MatrixID, 1, GL_FALSE, glm::value_ptr(MVP));
#if Instanced==true
glBindVertexArray(cubeVAO);
glDrawElementsInstanced(GL_TRIANGLES, sqIndice.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
#elif Instanced==false
glBindVertexArray(cubeGridVAO);
glDrawElements(GL_TRIANGLES, sqIndiceGrid.size(),GL_UNSIGNED_INT,(GLvoid*)(0));
#endif
glfwPollEvents();
glfwSwapBuffers(windowRef);
std::cout<<".\n";
running = !glfwGetKey(windowRef,GLFW_KEY_ESCAPE) && !glfwWindowShouldClose(windowRef);
}
glfwDestroyWindow(windowRef);
glfwTerminate();
return 0;
};
GLuint LoadShaders(const char * vertex_file_path,const char * fragment_file_path){
// Create the shaders
GLuint VertexShaderID = glCreateShader(GL_VERTEX_SHADER);
GLuint FragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);
// Read the Vertex Shader code from the file
std::string VertexShaderCode;
std::ifstream VertexShaderStream(vertex_file_path, std::ios::in);
if(VertexShaderStream.is_open()){
std::string Line = "";
while(getline(VertexShaderStream, Line))
VertexShaderCode += "\n" + Line;
VertexShaderStream.close();
}else{
printf("Impossible to open %s. Are you in the right directory?\n", vertex_file_path);
return 0;
}
// Read the Fragment Shader code from the file
std::string FragmentShaderCode;
std::ifstream FragmentShaderStream(fragment_file_path, std::ios::in);
if(FragmentShaderStream.is_open()){
std::string Line = "";
while(getline(FragmentShaderStream, Line))
FragmentShaderCode += "\n" + Line;
FragmentShaderStream.close();
}
GLint Result = GL_FALSE;
int InfoLogLength;
// Compile Vertex Shader
printf("Compiling shader : %s\n", vertex_file_path);
char const * VertexSourcePointer = VertexShaderCode.c_str();
glShaderSource(VertexShaderID, 1, &VertexSourcePointer , NULL);
glCompileShader(VertexShaderID);
// Check Vertex Shader
glGetShaderiv(VertexShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderiv(VertexShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> VertexShaderErrorMessage(InfoLogLength+1);
glGetShaderInfoLog(VertexShaderID, InfoLogLength, NULL, &VertexShaderErrorMessage[0]);
printf("%s\n", &VertexShaderErrorMessage[0]);
}
// Compile Fragment Shader
printf("Compiling shader : %s\n", fragment_file_path);
char const * FragmentSourcePointer = FragmentShaderCode.c_str();
glShaderSource(FragmentShaderID, 1, &FragmentSourcePointer , NULL);
glCompileShader(FragmentShaderID);
// Check Fragment Shader
glGetShaderiv(FragmentShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderiv(FragmentShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> FragmentShaderErrorMessage(InfoLogLength+1);
glGetShaderInfoLog(FragmentShaderID, InfoLogLength, NULL, &FragmentShaderErrorMessage[0]);
printf("%s\n", &FragmentShaderErrorMessage[0]);
}
// Link the program
printf("Linking program\n");
GLuint ProgramID = glCreateProgram();
glAttachShader(ProgramID, VertexShaderID);
glAttachShader(ProgramID, FragmentShaderID);
glLinkProgram(ProgramID);
// Check the program
glGetProgramiv(ProgramID, GL_LINK_STATUS, &Result);
glGetProgramiv(ProgramID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> ProgramErrorMessage(InfoLogLength+1);
glGetProgramInfoLog(ProgramID, InfoLogLength, NULL, &ProgramErrorMessage[0]);
printf("%s\n", &ProgramErrorMessage[0]);
}
glDeleteShader(VertexShaderID);
glDeleteShader(FragmentShaderID);
return ProgramID;
}
Run Code Online (Sandbox Code Playgroud)
好的,深呼吸并坐下:你的问题是显卡内存速度.
但是你可以通过修复这个bug来简化GPU:
glDrawElementsInstanced(GL_TRIANGLES, sqIndice.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
Run Code Online (Sandbox Code Playgroud)
glDrawElementsInstanced期望绘制的实例数作为最后一个参数.但是你传递了元素的数量latticePoints.这是实例数量的3倍.这导致着色器内部出现零点(因为阻止了越界访问).因此,16000个立方体不会被翻译并被绘制在相同的位置.这导致在立方体的正面上涂刷16000次.深度缓冲区不会阻止这种情况,因为面部不会相互隐藏,它们位于同一位置.
因此,当你distanceFromOrigin减少16000个中心立方体越来越大.OpenGL必须绘制越来越多的像素.还有更多,确切地说.它必须绘制得如此之多,以至于它达到了显卡内存的速度限制.
阅读诊断整个故事的OpenGl性能问题.
| 归档时间: |
|
| 查看次数: |
2262 次 |
| 最近记录: |