'Is the render target view the only way to output data from pixel shader in DirectX?

Purpose: I want to render an image in the screen and save it in my disk.

Description: I have a render target view. I have a input shader resource view with its texture (D3D11_USAGE_DYNAMIC). I have a output shader resource view with its texture (D3D11_USAGE_DEFAULT). I have a auxiliar simple texture (D3D11_USAGE_STAGING).

The execution path is the following:

  • Read input image in a texture.
  • Bind the input texture view and output texture view, pixel shader, sampler and vertex shader.
  • Run draw command.
  • Copy output texture to auxiliar texture.
  • Save auxiliar texture in a image. The image is empty.

Question: How can I output an additional texture and still rendering on screen?

Example code

    mWidth = width;
    mHeight = height;

    // Create image texture to hold input image for unormalized values and CPU write/GPU read access
    D3D11_TEXTURE2D_DESC inputImageDesc;
    ZeroMemory(&inputImageDesc, sizeof(D3D11_TEXTURE2D_DESC));
    inputImageDesc.ArraySize = 1;
    inputImageDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
    inputImageDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; // Needed for cpu write and gpu read
    inputImageDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
    inputImageDesc.Width = width;
    inputImageDesc.Height = height;
    inputImageDesc.MipLevels = 1;
    inputImageDesc.SampleDesc.Count = 1;
    inputImageDesc.SampleDesc.Quality = 0;
    inputImageDesc.Usage = D3D11_USAGE_DYNAMIC; // Needed for cpu write and gpu read

    result = engine.device()->CreateTexture2D(&inputImageDesc, nullptr, mInputTexture.GetAddressOf());
    if(result < 0)
        return -1;

    result = engine.device()->CreateShaderResourceView(mInputTexture.Get(), nullptr, mInputView.GetAddressOf());
    if(result < 0)
        return -1;

    // Create image texture for unormalized values and only GPU access
    D3D11_TEXTURE2D_DESC gpuImageDesc;
    ZeroMemory(&gpuImageDesc, sizeof(D3D11_TEXTURE2D_DESC));
    gpuImageDesc.ArraySize = 1;
    gpuImageDesc.CPUAccessFlags = 0; // Needed for gpu read/write (cpu no access)
    gpuImageDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
    gpuImageDesc.Width = width;
    gpuImageDesc.Height = height;
    gpuImageDesc.MipLevels = 1;
    gpuImageDesc.SampleDesc.Count = 1;
    gpuImageDesc.SampleDesc.Quality = 0;
    gpuImageDesc.Usage = D3D11_USAGE_DEFAULT; // Needed for gpu read/write (cpu no access)

    result = engine.device()->CreateTexture2D(&gpuImageDesc, nullptr, mOutputGpuTexture.GetAddressOf());
    if(result < 0)
        return -1;

    result = engine.device()->CreateShaderResourceView(mOutputGpuTexture.Get(), nullptr, mOutputView.GetAddressOf());
    if(result < 0)
        return -1;

    // Create image texture for unormalized values and only CPU read access
    D3D11_TEXTURE2D_DESC cpuImageDesc;
    ZeroMemory(&cpuImageDesc, sizeof(D3D11_TEXTURE2D_DESC));
    cpuImageDesc.BindFlags       = 0;
    cpuImageDesc.MiscFlags       = 0;
    cpuImageDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; // Needed for cpu read
    cpuImageDesc.Usage = D3D11_USAGE_STAGING; // Needed for cpu read

    result = engine.device()->CreateTexture2D(&cpuImageDesc, nullptr, mOutputCpuTexture.GetAddressOf());
    if(result < 0)
        return -1;
struct PixelInput
    float4 position : SV_POSITION;
    float4 color : COLOR;
    float2 coord : TEXCOORDIN;
    float2 coordOut : TEXCOORDOUT;

Texture2D<float4> gInputTexture : register(t0);
SamplerState gSampleType : register(s0);
RWTexture2D<float4> gOutputTexture : register(t1);

float4 main(PixelInput input) : SV_TARGET
    gOutputTexture[input.coordOut] = float4(1.0,0.0,0.0,1.0);

    float4 inputPixel = float4(0.0, 0.0, 0.0, 1.0);
    inputPixel.rgb = gInputTexture.Sample(gSampleType, input.coord).rgb;
    return inputPixel;

    engine.context()->CopyResource(mOutputCpuTexture.Get(), mOutputGpuTexture.Get());
    D3D11_MAPPED_SUBRESOURCE mappedImgData;
    ZeroMemory(&mappedImgData, sizeof(D3D11_MAPPED_SUBRESOURCE));
    int32_t result = engine.context()->Map(mOutputCpuTexture.Get(), 0, D3D11_MAP_READ, 0, &mappedImgData);
    if(result < EC_SUCCESS)

    // Copy the less bytes possible, avoiding out of bounds.
    const uint32_t bytesPerRow = std::min(rowPitch, mappedImgData.RowPitch);

    uint8_t* textureData = reinterpret_cast<uint8_t*>(mappedImgData.pData);
    for(uint32_t i = 0; i < height; ++i)
        memcpy(dst, textureData, bytesPerRow);
        textureData += mappedImgData.RowPitch;
        dst += rowPitch;
    engine.context()->Unmap(mOutputCpuTexture.Get(), 0);

Solution 1:[1]

What I did to fix this was add a compute shader as intermediary, in that way I read a RWTexture (DEFAULT) with another texture (STAGING) and also another one to read the back buffer.


