Roll ANGLE from 1434697fadd3 to 2ecb3217972b (5 revisions) am: 9cdc28b0fcmain-16k

Original change: https://android-review.googlesource.com/c/platform/external/angle/+/2322713 Change-Id: I1f15d4cb826d551ddcf86fab328fae1de459c2f8 Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
author: android-autoroll <android-autoroll@skia-public.iam.gserviceaccount.com> 2022-12-01 01:21:06 +0000
committer: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> 2022-12-01 01:21:06 +0000
commit: 1d9f62859e5709bee647813285cf42dd253396d6 (patch)
tree: 64414618b227dd73fb7e060051dd4ab89e2e7f8b
parent: 6eeb92d98c8d1db3000b1b9fbfc5663853bfe80c (diff)
parent: 9cdc28b0fc595bd00cc5ca9133c82c065f504611 (diff)
download: angle-main-16k.tar.gz
11 files changed, 494 insertions, 327 deletions
diff --git a/scripts/code_generation_hashes/GL_EGL_entry_points.json b/scripts/code_generation_hashes/GL_EGL_entry_points.json
index ad0797e685..6847d8bdac 100644
--- a/scripts/code_generation_hashes/GL_EGL_entry_points.json
+++ b/scripts/code_generation_hashes/GL_EGL_entry_points.json
@@ -6,7 +6,7 @@
   "scripts/entry_point_packed_gl_enums.json":
     "4d5018871615ae745a34305fbb0f8093",
   "scripts/generate_entry_points.py":
-    "0ea0e8dc52c8d2dd3be1430c36a5ceba",
+    "63df9e5509d9615fb74a916156ae6bfa",
   "scripts/gl_angle_ext.xml":
     "98a3ec3a18a0ea57bca6c8c27c8575cf",
   "scripts/registry_xml.py":
@@ -168,7 +168,7 @@
   "src/libGLESv2/entry_points_gles_3_2_autogen.h":
     "647f932a299cdb4726b60bbba059f0d2",
   "src/libGLESv2/entry_points_gles_ext_autogen.cpp":
-    "37c38ebbfb0ce121b48f8cbbc6300533",
+    "1439ec87cca7cb254bcf4d85a98aa19d",
   "src/libGLESv2/entry_points_gles_ext_autogen.h":
     "b81005771e30c7c4c81665a33a359856",
   "src/libGLESv2/libGLESv2_autogen.cpp":
diff --git a/scripts/generate_entry_points.py b/scripts/generate_entry_points.py
index bfe79fb351..a91cd6de6a 100755
--- a/scripts/generate_entry_points.py
+++ b/scripts/generate_entry_points.py
@@ -227,7 +227,7 @@ TEMPLATE_ENTRY_POINT_DECL = """{angle_export}{return_type} {export_def} {name}({
 
 TEMPLATE_GLES_ENTRY_POINT_NO_RETURN = """\
 void GL_APIENTRY GL_{name}({params})
-{{
+{{{optional_gl_entry_point_locks}
     Context *context = {context_getter};
     {event_comment}EVENT(context, GL{name}, "context = %d{comma_if_needed}{format_params}", CID(context){comma_if_needed}{pass_params});
 
@@ -250,7 +250,7 @@ void GL_APIENTRY GL_{name}({params})
 
 TEMPLATE_GLES_ENTRY_POINT_WITH_RETURN = """\
 {return_type} GL_APIENTRY GL_{name}({params})
-{{
+{{{optional_gl_entry_point_locks}
     Context *context = {context_getter};
     {event_comment}EVENT(context, GL{name}, "context = %d{comma_if_needed}{format_params}", CID(context){comma_if_needed}{pass_params});
 
@@ -1679,6 +1679,8 @@ def format_entry_point_def(api, command_node, cmd_name, proto, params, cmd_packe
             get_egl_entry_point_labeled_object(ep_to_object, cmd_name, params, packed_enums),
         "entry_point_locks":
             get_locks(api, cmd_name, params),
+        "optional_gl_entry_point_locks":
+            get_optional_gl_locks(api, cmd_name, params),
         "preamble":
             get_preamble(api, cmd_name, params)
     }
@@ -2681,6 +2683,18 @@ def get_locks(api, cmd_name, params):
     return ordered_lock_statements(LOCK_GLOBAL)
 
 
+def get_optional_gl_locks(api, cmd_name, params):
+    if api != apis.GLES:
+        return ""
+
+    # EGLImage related commands need to access EGLImage and Display which should
+    # be protected with global lock
+    if not cmd_name.startswith("glEGLImage"):
+        return ""
+
+    return ordered_lock_statements(LOCK_GLOBAL)
+
+
 def get_prepare_swap_buffers_call(api, cmd_name, params):
     if cmd_name not in [
             "eglSwapBuffers", "eglSwapBuffersWithDamageKHR", "eglSwapBuffersWithFrameTokenANGLE"
diff --git a/src/libANGLE/renderer/vulkan/CommandProcessor.cpp b/src/libANGLE/renderer/vulkan/CommandProcessor.cpp
index 5e2d53a305..81cc0c56a0 100644
--- a/src/libANGLE/renderer/vulkan/CommandProcessor.cpp
+++ b/src/libANGLE/renderer/vulkan/CommandProcessor.cpp
@@ -26,7 +26,7 @@ void InitializeSubmitInfo(VkSubmitInfo *submitInfo,
                           const PrimaryCommandBuffer &commandBuffer,
                           const std::vector<VkSemaphore> &waitSemaphores,
                           const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
-                          const Semaphore *signalSemaphore)
+                          const VkSemaphore &signalSemaphore)
 {
     // Verify that the submitInfo has been zero'd out.
     ASSERT(submitInfo->signalSemaphoreCount == 0);
@@ -38,10 +38,10 @@ void InitializeSubmitInfo(VkSubmitInfo *submitInfo,
     submitInfo->pWaitSemaphores    = waitSemaphores.empty() ? nullptr : waitSemaphores.data();
     submitInfo->pWaitDstStageMask  = waitSemaphoreStageMasks.data();
 
-    if (signalSemaphore)
+    if (signalSemaphore != VK_NULL_HANDLE)
     {
         submitInfo->signalSemaphoreCount = 1;
-        submitInfo->pSignalSemaphores    = signalSemaphore->ptr();
+        submitInfo->pSignalSemaphores    = &signalSemaphore;
     }
 }
 
@@ -153,7 +153,7 @@ void CommandProcessorTask::initTask()
     mOutsideRenderPassCommandBuffer = nullptr;
     mRenderPassCommandBuffer        = nullptr;
     mRenderPass                     = nullptr;
-    mSemaphore                      = nullptr;
+    mSemaphore                      = VK_NULL_HANDLE;
     mCommandPools                   = nullptr;
     mOneOffWaitSemaphore            = nullptr;
     mOneOffWaitSemaphoreStageMask   = 0;
@@ -277,7 +277,7 @@ void CommandProcessorTask::initWaitIdle()
 void CommandProcessorTask::initFlushAndQueueSubmit(
     const std::vector<VkSemaphore> &waitSemaphores,
     const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
-    const Semaphore *semaphore,
+    const VkSemaphore semaphore,
     bool hasProtectedContent,
     egl::ContextPriority priority,
     SecondaryCommandPools *commandPools,
@@ -744,7 +744,7 @@ angle::Result CommandProcessor::submitCommands(
     egl::ContextPriority priority,
     const std::vector<VkSemaphore> &waitSemaphores,
     const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
-    const Semaphore *signalSemaphore,
+    const VkSemaphore signalSemaphore,
     GarbageList &&currentGarbage,
     SecondaryCommandBufferList &&commandBuffersToReset,
     SecondaryCommandPools *commandPools,
@@ -1166,7 +1166,7 @@ angle::Result CommandQueue::submitCommands(
     egl::ContextPriority priority,
     const std::vector<VkSemaphore> &waitSemaphores,
     const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
-    const Semaphore *signalSemaphore,
+    const VkSemaphore signalSemaphore,
     GarbageList &&currentGarbage,
     SecondaryCommandBufferList &&commandBuffersToReset,
     SecondaryCommandPools *commandPools,
@@ -1188,7 +1188,7 @@ angle::Result CommandQueue::submitCommands(
     // Don't make a submission if there is nothing to submit.
     PrimaryCommandBuffer &commandBuffer = getCommandBuffer(hasProtectedContent);
     const bool hasAnyPendingCommands    = commandBuffer.valid();
-    if (hasAnyPendingCommands || signalSemaphore != nullptr || !waitSemaphores.empty())
+    if (hasAnyPendingCommands || signalSemaphore != VK_NULL_HANDLE || !waitSemaphores.empty())
     {
         if (commandBuffer.valid())
         {
diff --git a/src/libANGLE/renderer/vulkan/CommandProcessor.h b/src/libANGLE/renderer/vulkan/CommandProcessor.h
index b65d9147e5..d785721280 100644
--- a/src/libANGLE/renderer/vulkan/CommandProcessor.h
+++ b/src/libANGLE/renderer/vulkan/CommandProcessor.h
@@ -101,7 +101,7 @@ class CommandProcessorTask
 
     void initFlushAndQueueSubmit(const std::vector<VkSemaphore> &waitSemaphores,
                                  const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
-                                 const Semaphore *semaphore,
+                                 const VkSemaphore semaphore,
                                  bool hasProtectedContent,
                                  egl::ContextPriority priority,
                                  SecondaryCommandPools *commandPools,
@@ -132,7 +132,7 @@ class CommandProcessorTask
     {
         return mWaitSemaphoreStageMasks;
     }
-    const Semaphore *getSemaphore() { return mSemaphore; }
+    VkSemaphore getSemaphore() { return mSemaphore; }
     GarbageList &getGarbage() { return mGarbage; }
     SecondaryCommandBufferList &&getCommandBuffersToReset()
     {
@@ -169,7 +169,7 @@ class CommandProcessorTask
     // Flush data
     std::vector<VkSemaphore> mWaitSemaphores;
     std::vector<VkPipelineStageFlags> mWaitSemaphoreStageMasks;
-    const Semaphore *mSemaphore;
+    VkSemaphore mSemaphore;
     SecondaryCommandPools *mCommandPools;
     GarbageList mGarbage;
     SecondaryCommandBufferList mCommandBuffersToReset;
@@ -306,7 +306,7 @@ class CommandQueueInterface : angle::NonCopyable
         egl::ContextPriority priority,
         const std::vector<VkSemaphore> &waitSemaphores,
         const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
-        const Semaphore *signalSemaphore,
+        const VkSemaphore signalSemaphore,
         GarbageList &&currentGarbage,
         SecondaryCommandBufferList &&commandBuffersToReset,
         SecondaryCommandPools *commandPools,
@@ -377,7 +377,7 @@ class CommandQueue final : public CommandQueueInterface
                                  egl::ContextPriority priority,
                                  const std::vector<VkSemaphore> &waitSemaphores,
                                  const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
-                                 const Semaphore *signalSemaphore,
+                                 const VkSemaphore signalSemaphore,
                                  GarbageList &&currentGarbage,
                                  SecondaryCommandBufferList &&commandBuffersToReset,
                                  SecondaryCommandPools *commandPools,
@@ -544,7 +544,7 @@ class CommandProcessor final : public Context, public CommandQueueInterface
                                  egl::ContextPriority priority,
                                  const std::vector<VkSemaphore> &waitSemaphores,
                                  const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
-                                 const Semaphore *signalSemaphore,
+                                 const VkSemaphore signalSemaphore,
                                  GarbageList &&currentGarbage,
                                  SecondaryCommandBufferList &&commandBuffersToReset,
                                  SecondaryCommandPools *commandPools,
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.cpp b/src/libANGLE/renderer/vulkan/ContextVk.cpp
index faa169b379..79822c455c 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@@ -2073,12 +2073,15 @@ angle::Result ContextVk::handleDirtyGraphicsReadOnlyDepthFeedbackLoopMode(
 angle::Result ContextVk::handleDirtyAnySamplePassedQueryEnd(DirtyBits::Iterator *dirtyBitsIterator,
                                                             DirtyBits dirtyBitMask)
 {
-    // When we switch from query enabled draw to query disabled draw, we do immediate flush to
-    // ensure the query result will be ready early so that application thread calling getQueryResult
-    // gets unblocked sooner.
-    dirtyBitsIterator->setLaterBit(DIRTY_BIT_RENDER_PASS);
-    mHasDeferredFlush = true;
+    if (mRenderPassCommands->started())
+    {
+        // When we switch from query enabled draw to query disabled draw, we do immediate flush to
+        // ensure the query result will be ready early so that application thread calling
+        // getQueryResult gets unblocked sooner.
+        dirtyBitsIterator->setLaterBit(DIRTY_BIT_RENDER_PASS);
 
+        mHasDeferredFlush = true;
+    }
     return angle::Result::Continue;
 }
 
@@ -5186,7 +5189,8 @@ angle::Result ContextVk::syncState(const gl::Context *context,
                 // open, such as invalidate or blit. Note that we always start a new command buffer
                 // because we currently can only support one open RenderPass at a time.
                 onRenderPassFinished(RenderPassClosureReason::FramebufferBindingChange);
-                if (getFeatures().preferSubmitAtFBOBoundary.enabled)
+                if (getFeatures().preferSubmitAtFBOBoundary.enabled &&
+                    mRenderPassCommands->started())
                 {
                     // This will behave as if user called glFlush, but the actual flush will be
                     // triggered at endRenderPass time.
@@ -7124,6 +7128,11 @@ angle::Result ContextVk::flushCommandsAndEndRenderPassWithoutSubmit(RenderPassCl
 
 angle::Result ContextVk::flushCommandsAndEndRenderPass(RenderPassClosureReason reason)
 {
+    // The main reason we have mHasDeferredFlush is not to break renderpass just because we want
+    // to issue a flush. So there must be a started RP if it is true. Otherwise we should just
+    // issue a flushImpl immediately instead of set mHasDeferredFlush to true.
+    ASSERT(!mHasDeferredFlush || mRenderPassCommands->started());
+
     ANGLE_TRY(flushCommandsAndEndRenderPassWithoutSubmit(reason));
 
     if (mHasDeferredFlush)
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp
index cf526aa9aa..f4b44a78ea 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -3886,7 +3886,7 @@ void RendererVk::initFeatures(DisplayVk *displayVk,
     // Testing shows that on ARM GPU, doing implicit flush at framebuffer boundary improves
     // performance. Most app traces shows frame time reduced and manhattan 3.1 offscreen score
     // improves 7%.
-    ANGLE_FEATURE_CONDITION(&mFeatures, preferSubmitAtFBOBoundary, isARM);
+    ANGLE_FEATURE_CONDITION(&mFeatures, preferSubmitAtFBOBoundary, isARM || isSwiftShader);
 
     // In order to support immutable samplers tied to external formats, we need to overallocate
     // descriptor counts for such immutable samplers
@@ -4807,13 +4807,16 @@ angle::Result RendererVk::submitCommands(
         std::move(mRenderPassCommandBufferRecycler.releaseCommandBuffersToReset()),
     };
 
+    const VkSemaphore signalVkSemaphore =
+        signalSemaphore ? signalSemaphore->getHandle() : VK_NULL_HANDLE;
+
     if (isAsyncCommandQueueEnabled())
     {
         *submitSerialOut = mCommandProcessor.reserveSubmitSerial();
 
         ANGLE_TRY(mCommandProcessor.submitCommands(
             context, hasProtectedContent, contextPriority, waitSemaphores, waitSemaphoreStageMasks,
-            signalSemaphore, std::move(currentGarbage), std::move(commandBuffersToReset),
+            signalVkSemaphore, std::move(currentGarbage), std::move(commandBuffersToReset),
             commandPools, *submitSerialOut));
     }
     else
@@ -4822,7 +4825,7 @@ angle::Result RendererVk::submitCommands(
 
         ANGLE_TRY(mCommandQueue.submitCommands(
             context, hasProtectedContent, contextPriority, waitSemaphores, waitSemaphoreStageMasks,
-            signalSemaphore, std::move(currentGarbage), std::move(commandBuffersToReset),
+            signalVkSemaphore, std::move(currentGarbage), std::move(commandBuffersToReset),
             commandPools, *submitSerialOut));
     }
 
diff --git a/src/libANGLE/renderer/vulkan/SurfaceVk.cpp b/src/libANGLE/renderer/vulkan/SurfaceVk.cpp
index 2a3a5f87c5..f418ad61e2 100644
--- a/src/libANGLE/renderer/vulkan/SurfaceVk.cpp
+++ b/src/libANGLE/renderer/vulkan/SurfaceVk.cpp
@@ -30,10 +30,14 @@ namespace
 {
 angle::SubjectIndex kAnySurfaceImageSubjectIndex = 0;
 
-// Special value for currentExtent if surface size is determined by the
-// swapchain's extent. See VkSurfaceCapabilitiesKHR spec for more details.
+// Special value for currentExtent if surface size is determined by the swapchain's extent.  See
+// the VkSurfaceCapabilitiesKHR spec for more details.
 constexpr uint32_t kSurfaceSizedBySwapchain = 0xFFFFFFFFu;
 
+// Special value for ImagePresentOperation::imageIndex meaning that it corresponds to an older
+// swapchain present operation and so the index is no longer relevant.
+constexpr uint32_t kInvalidImageIndex = std::numeric_limits<uint32_t>::max();
+
 GLint GetSampleCount(const egl::Config *config)
 {
     GLint samples = 1;
@@ -349,6 +353,93 @@ angle::Result GetPresentModes(DisplayVk *displayVk,
     return angle::Result::Continue;
 }
 
+angle::Result NewSemaphore(vk::Context *context,
+                           vk::Recycler<vk::Semaphore> *semaphoreRecycler,
+                           vk::Semaphore *semaphoreOut)
+{
+    if (semaphoreRecycler->empty())
+    {
+        ANGLE_VK_TRY(context, semaphoreOut->init(context->getDevice()));
+    }
+    else
+    {
+        semaphoreRecycler->fetch(semaphoreOut);
+    }
+    return angle::Result::Continue;
+}
+
+VkResult NewFence(vk::Context *context, vk::Recycler<vk::Fence> *fenceRecycler, vk::Fence *fenceOut)
+{
+    VkResult result = VK_SUCCESS;
+    if (fenceRecycler->empty())
+    {
+        VkFenceCreateInfo fenceCreateInfo = {};
+        fenceCreateInfo.sType             = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+        fenceCreateInfo.flags             = 0;
+        result                            = fenceOut->init(context->getDevice(), fenceCreateInfo);
+    }
+    else
+    {
+        fenceRecycler->fetch(fenceOut);
+        result = fenceOut->reset(context->getDevice());
+        if (result != VK_SUCCESS)
+        {
+            fenceRecycler->recycle(std::move(*fenceOut));
+        }
+    }
+    return result;
+}
+
+void AssociateFenceWithPresentHistory(uint32_t imageIndex,
+                                      vk::Fence &&presentFence,
+                                      std::deque<impl::ImagePresentOperation> *presentHistory)
+{
+    // The history looks like this:
+    //
+    // <entries for old swapchains, imageIndex == UINT32_MAX> <entries for this swapchain>
+    //
+    // Walk the list backwards and find the entry for the given image index.  That's the last
+    // present with that image.  Associate the fence with that present operation.
+    for (size_t historyIndex = 0; historyIndex < presentHistory->size(); ++historyIndex)
+    {
+        impl::ImagePresentOperation &presentOperation =
+            (*presentHistory)[presentHistory->size() - historyIndex - 1];
+        if (presentOperation.imageIndex == kInvalidImageIndex)
+        {
+            // No previous presentation with this index.
+            break;
+        }
+
+        if (presentOperation.imageIndex == imageIndex)
+        {
+            ASSERT(!presentOperation.fence.valid());
+            presentOperation.fence = std::move(presentFence);
+            return;
+        }
+    }
+
+    // If no previous presentation with this index, add an empty entry just so the fence can be
+    // cleaned up.
+    presentHistory->emplace_back();
+    presentHistory->back().fence      = std::move(presentFence);
+    presentHistory->back().imageIndex = imageIndex;
+}
+
+bool HasAnyOldSwapchains(const std::deque<impl::ImagePresentOperation> &presentHistory)
+{
+    // Used to validate that swapchain clean up data can only be carried by the first present
+    // operation of a swapchain.  That operation is already removed from history when this call is
+    // made, so this verifies that no clean up data exists in the history.
+    for (const impl::ImagePresentOperation &presentOperation : presentHistory)
+    {
+        if (!presentOperation.oldSwapchains.empty())
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
 }  // namespace
 
 SurfaceVk::SurfaceVk(const egl::SurfaceState &surfaceState) : SurfaceImpl(surfaceState) {}
@@ -681,24 +772,51 @@ void SwapchainCleanupData::destroy(VkDevice device, vk::Recycler<vk::Semaphore>
     semaphores.clear();
 }
 
-ImagePresentHistory::ImagePresentHistory() = default;
-ImagePresentHistory::~ImagePresentHistory()
+ImagePresentOperation::ImagePresentOperation() : imageIndex(kInvalidImageIndex) {}
+ImagePresentOperation::~ImagePresentOperation()
 {
+    ASSERT(!fence.valid());
     ASSERT(!semaphore.valid());
     ASSERT(oldSwapchains.empty());
 }
 
-ImagePresentHistory::ImagePresentHistory(ImagePresentHistory &&other)
-    : semaphore(std::move(other.semaphore)), oldSwapchains(std::move(other.oldSwapchains))
+ImagePresentOperation::ImagePresentOperation(ImagePresentOperation &&other)
+    : fence(std::move(other.fence)),
+      semaphore(std::move(other.semaphore)),
+      oldSwapchains(std::move(other.oldSwapchains)),
+      imageIndex(other.imageIndex)
 {}
 
-ImagePresentHistory &ImagePresentHistory::operator=(ImagePresentHistory &&other)
+ImagePresentOperation &ImagePresentOperation::operator=(ImagePresentOperation &&other)
 {
+    std::swap(fence, other.fence);
     std::swap(semaphore, other.semaphore);
     std::swap(oldSwapchains, other.oldSwapchains);
+    std::swap(imageIndex, other.imageIndex);
     return *this;
 }
 
+void ImagePresentOperation::destroy(VkDevice device,
+                                    vk::Recycler<vk::Fence> *fenceRecycler,
+                                    vk::Recycler<vk::Semaphore> *semaphoreRecycler)
+{
+    fenceRecycler->recycle(std::move(fence));
+
+    // On the first acquire of the image, a fence is used but there is no present semaphore to clean
+    // up.  That fence is placed in the present history just for clean up purposes.
+    if (semaphore.valid())
+    {
+        semaphoreRecycler->recycle(std::move(semaphore));
+    }
+
+    // Destroy old swapchains
+    for (SwapchainCleanupData &oldSwapchain : oldSwapchains)
+    {
+        oldSwapchain.destroy(device, semaphoreRecycler);
+    }
+    oldSwapchains.clear();
+}
+
 SwapchainImage::SwapchainImage()  = default;
 SwapchainImage::~SwapchainImage() = default;
 
@@ -707,8 +825,7 @@ SwapchainImage::SwapchainImage(SwapchainImage &&other)
       imageViews(std::move(other.imageViews)),
       framebuffer(std::move(other.framebuffer)),
       fetchFramebuffer(std::move(other.fetchFramebuffer)),
-      framebufferResolveMS(std::move(other.framebufferResolveMS)),
-      presentHistory(std::move(other.presentHistory))
+      framebufferResolveMS(std::move(other.framebufferResolveMS))
 {}
 }  // namespace impl
 
@@ -765,6 +882,16 @@ void WindowSurfaceVk::destroy(const egl::Display *display)
         mLockBufferHelper.destroy(renderer);
     }
 
+    for (impl::ImagePresentOperation &presentOperation : mPresentHistory)
+    {
+        if (presentOperation.fence.valid())
+        {
+            (void)presentOperation.fence.wait(device, renderer->getMaxFenceWaitTimeNs());
+        }
+        presentOperation.destroy(device, &mPresentFenceRecycler, &mPresentSemaphoreRecycler);
+    }
+    mPresentHistory.clear();
+
     destroySwapChainImages(displayVk);
 
     if (mSwapchain)
@@ -790,6 +917,7 @@ void WindowSurfaceVk::destroy(const egl::Display *display)
     }
 
     mPresentSemaphoreRecycler.destroy(device);
+    mPresentFenceRecycler.destroy(device);
 
     // Call parent class to destroy any resources parent owns.
     SurfaceVk::destroy(display);
@@ -1054,76 +1182,91 @@ angle::Result WindowSurfaceVk::getAttachmentRenderTarget(const gl::Context *cont
 
 angle::Result WindowSurfaceVk::recreateSwapchain(ContextVk *contextVk, const gl::Extents &extents)
 {
-    // If mOldSwapchains is not empty, it means that a new swapchain was created, but before
-    // any of its images were presented, it's asked to be recreated.  In this case, we can destroy
-    // the current swapchain immediately (although the old swapchains still need to be kept to be
-    // scheduled for destruction).  This can happen for example if vkQueuePresentKHR returns
+    // If no present operation has been done on the new swapchain, it can be destroyed right away.
+    // This means that a new swapchain was created, but before any of its images were presented,
+    // it's asked to be recreated.  This can happen for example if vkQueuePresentKHR returns
     // OUT_OF_DATE, the swapchain is recreated and the following vkAcquireNextImageKHR again
-    // returns OUT_OF_DATE.
+    // returns OUT_OF_DATE.  Otherwise, keep the current swapchain as the old swapchain to be
+    // scheduled for destruction.
     //
-    // Otherwise, keep the current swapchain as the old swapchain to be scheduled for destruction
-    // and create a new one.
-
+    // The old(er) swapchains still need to be kept to be scheduled for destruction.
     VkSwapchainKHR swapchainToDestroy = VK_NULL_HANDLE;
 
-    if (!mOldSwapchains.empty())
+    if (mPresentHistory.empty() || mPresentHistory.back().imageIndex == kInvalidImageIndex)
     {
-        // Keep the old swapchain, destroy the current (never-used) swapchain.
+        // Destroy the current (never-used) swapchain.
         swapchainToDestroy = mSwapchain;
+    }
 
-        // Recycle present semaphores.
-        for (SwapchainImage &swapchainImage : mSwapchainImages)
-        {
-            for (ImagePresentHistory &presentHistory : swapchainImage.presentHistory)
-            {
-                ASSERT(presentHistory.semaphore.valid());
-                ASSERT(presentHistory.oldSwapchains.empty());
+    // Place any present operation that's not associated with a fence into mOldSwapchains.  That
+    // gets scheduled for destruction when the semaphore of the first image of the next swapchain
+    // can be recycled.
+    SwapchainCleanupData cleanupData;
 
-                mPresentSemaphoreRecycler.recycle(std::move(presentHistory.semaphore));
-            }
-        }
+    // If the swapchain is not being immediately destroyed, schedule it for destruction.
+    if (swapchainToDestroy == VK_NULL_HANDLE)
+    {
+        cleanupData.swapchain = mSwapchain;
     }
-    else
+
+    std::vector<impl::ImagePresentOperation> historyToKeep;
+    while (!mPresentHistory.empty())
     {
-        SwapchainCleanupData cleanupData;
+        impl::ImagePresentOperation &presentOperation = mPresentHistory.back();
 
-        // Remember the current swapchain to be scheduled for destruction later.
-        cleanupData.swapchain = mSwapchain;
+        // If this is about an older swapchain, let it be.
+        if (presentOperation.imageIndex == kInvalidImageIndex)
+        {
+            ASSERT(presentOperation.fence.valid());
+            break;
+        }
 
-        // Accumulate the semaphores to be destroyed at the same time as the swapchain.
-        for (SwapchainImage &swapchainImage : mSwapchainImages)
+        // Reset the index, so it's not processed in the future.
+        presentOperation.imageIndex = kInvalidImageIndex;
+
+        if (presentOperation.fence.valid())
+        {
+            // If there is already a fence associated with it, let it be cleaned up once the fence
+            // is signaled.
+            historyToKeep.push_back(std::move(presentOperation));
+        }
+        else
         {
-            for (ImagePresentHistory &presentHistory : swapchainImage.presentHistory)
+            ASSERT(presentOperation.semaphore.valid());
+
+            // Otherwise accumulate it in mOldSwapchains.
+            cleanupData.semaphores.emplace_back(std::move(presentOperation.semaphore));
+
+            // Accumulate any previous swapchains that are pending destruction too.
+            for (SwapchainCleanupData &oldSwapchain : presentOperation.oldSwapchains)
             {
-                ASSERT(presentHistory.semaphore.valid());
-                cleanupData.semaphores.emplace_back(std::move(presentHistory.semaphore));
-
-                // Accumulate any previous swapchains that are pending destruction too.
-                for (SwapchainCleanupData &oldSwapchain : presentHistory.oldSwapchains)
-                {
-                    mOldSwapchains.emplace_back(std::move(oldSwapchain));
-                }
-                presentHistory.oldSwapchains.clear();
+                mOldSwapchains.emplace_back(std::move(oldSwapchain));
             }
+            presentOperation.oldSwapchains.clear();
         }
 
-        // If too many old swapchains have accumulated, wait idle and destroy them.  This is to
-        // prevent failures due to too many swapchains allocated.
-        //
-        // Note: Nvidia has been observed to fail creation of swapchains after 20 are allocated on
-        // desktop, or less than 10 on Quadro P400.
-        static constexpr size_t kMaxOldSwapchains = 5;
-        if (mOldSwapchains.size() > kMaxOldSwapchains)
+        mPresentHistory.pop_back();
+    }
+    std::move(historyToKeep.begin(), historyToKeep.end(), std::back_inserter(mPresentHistory));
+
+    // If too many old swapchains have accumulated, wait idle and destroy them.  This is to prevent
+    // failures due to too many swapchains allocated.
+    //
+    // Note: Nvidia has been observed to fail creation of swapchains after 20 are allocated on
+    // desktop, or less than 10 on Quadro P400.
+    static constexpr size_t kMaxOldSwapchains = 5;
+    if (mOldSwapchains.size() > kMaxOldSwapchains)
+    {
+        ANGLE_TRY(contextVk->getRenderer()->finish(contextVk, contextVk->hasProtectedContent()));
+        for (SwapchainCleanupData &oldSwapchain : mOldSwapchains)
         {
-            ANGLE_TRY(
-                contextVk->getRenderer()->finish(contextVk, contextVk->hasProtectedContent()));
-            for (SwapchainCleanupData &oldSwapchain : mOldSwapchains)
-            {
-                oldSwapchain.destroy(contextVk->getDevice(), &mPresentSemaphoreRecycler);
-            }
-            mOldSwapchains.clear();
+            oldSwapchain.destroy(contextVk->getDevice(), &mPresentSemaphoreRecycler);
         }
+        mOldSwapchains.clear();
+    }
 
+    if (cleanupData.swapchain != VK_NULL_HANDLE || !cleanupData.semaphores.empty())
+    {
         mOldSwapchains.emplace_back(std::move(cleanupData));
     }
 
@@ -1156,20 +1299,6 @@ angle::Result WindowSurfaceVk::recreateSwapchain(ContextVk *contextVk, const gl:
     return result;
 }
 
-angle::Result WindowSurfaceVk::newPresentSemaphore(vk::Context *context,
-                                                   vk::Semaphore *semaphoreOut)
-{
-    if (mPresentSemaphoreRecycler.empty())
-    {
-        ANGLE_VK_TRY(context, semaphoreOut->init(context->getDevice()));
-    }
-    else
-    {
-        mPresentSemaphoreRecycler.fetch(semaphoreOut);
-    }
-    return angle::Result::Continue;
-}
-
 angle::Result WindowSurfaceVk::resizeSwapchainImages(vk::Context *context, uint32_t imageCount)
 {
     if (static_cast<size_t>(imageCount) != mSwapchainImages.size())
@@ -1192,18 +1321,6 @@ angle::Result WindowSurfaceVk::resizeSwapchainImages(vk::Context *context, uint3
         }
     }
 
-    // At this point, if there was a previous swapchain, the previous present semaphores have all
-    // been moved to mOldSwapchains to be scheduled for destruction, so all semaphore handles in
-    // mSwapchainImages should be invalid.
-    for (SwapchainImage &swapchainImage : mSwapchainImages)
-    {
-        for (ImagePresentHistory &presentHistory : swapchainImage.presentHistory)
-        {
-            ASSERT(!presentHistory.semaphore.valid());
-            ANGLE_TRY(newPresentSemaphore(context, &presentHistory.semaphore));
-        }
-    }
-
     return angle::Result::Continue;
 }
 
@@ -1506,13 +1623,6 @@ void WindowSurfaceVk::releaseSwapchainImages(ContextVk *contextVk)
         {
             contextVk->addGarbage(&swapchainImage.framebufferResolveMS);
         }
-
-        // present history must have already been taken care of.
-        for (ImagePresentHistory &presentHistory : swapchainImage.presentHistory)
-        {
-            ASSERT(!presentHistory.semaphore.valid());
-            ASSERT(presentHistory.oldSwapchains.empty());
-        }
     }
 
     mSwapchainImages.clear();
@@ -1544,18 +1654,6 @@ void WindowSurfaceVk::destroySwapChainImages(DisplayVk *displayVk)
         {
             swapchainImage.framebufferResolveMS.destroy(device);
         }
-
-        for (ImagePresentHistory &presentHistory : swapchainImage.presentHistory)
-        {
-            ASSERT(presentHistory.semaphore.valid());
-
-            mPresentSemaphoreRecycler.recycle(std::move(presentHistory.semaphore));
-            for (SwapchainCleanupData &oldSwapchain : presentHistory.oldSwapchains)
-            {
-                oldSwapchain.destroy(device, &mPresentSemaphoreRecycler);
-            }
-            presentHistory.oldSwapchains.clear();
-        }
     }
 
     mSwapchainImages.clear();
@@ -1643,25 +1741,12 @@ vk::Framebuffer &WindowSurfaceVk::chooseFramebuffer(const SwapchainResolveMode s
                : mSwapchainImages[mCurrentSwapchainImageIndex].framebuffer;
 }
 
-angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
-                                       const EGLint *rects,
-                                       EGLint n_rects,
-                                       const void *pNextChain,
-                                       bool *presentOutOfDate)
+angle::Result WindowSurfaceVk::prePresentSubmit(ContextVk *contextVk,
+                                                const vk::Semaphore &presentSemaphore,
+                                                QueueSerial *swapSerial)
 {
-    ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::present");
     RendererVk *renderer = contextVk->getRenderer();
 
-    // Throttle the submissions to avoid getting too far ahead of the GPU.
-    QueueSerial *swapSerial = &mSwapHistory.front();
-    mSwapHistory.next();
-
-    if (swapSerial->valid())
-    {
-        ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::present: Throttle CPU");
-        ANGLE_TRY(renderer->finishQueueSerial(contextVk, *swapSerial));
-    }
-
     SwapchainImage &image               = mSwapchainImages[mCurrentSwapchainImageIndex];
     vk::Framebuffer &currentFramebuffer = chooseFramebuffer(SwapchainResolveMode::Disabled);
 
@@ -1720,47 +1805,47 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
                                       vk::ImageLayout::Present, commandBuffer);
     }
 
-    // Knowing that the kSwapHistorySize'th submission ago has finished, we can know that the
-    // (kSwapHistorySize+1)'th present ago of this image is definitely finished and so its wait
-    // semaphore can be reused.  See doc/PresentSemaphores.md for details.
-    //
-    // This also means the swapchain(s) scheduled to be deleted at the same time can be deleted.
-    ImagePresentHistory &presentHistory = image.presentHistory.front();
-    image.presentHistory.next();
-
-    vk::Semaphore *presentSemaphore = &presentHistory.semaphore;
-    ASSERT(presentSemaphore->valid());
-
-    for (SwapchainCleanupData &oldSwapchain : presentHistory.oldSwapchains)
-    {
-        oldSwapchain.destroy(contextVk->getDevice(), &mPresentSemaphoreRecycler);
-    }
-    presentHistory.oldSwapchains.clear();
-
-    // Schedule pending old swapchains to be destroyed at the same time the semaphore for this
-    // present can be destroyed.
-    presentHistory.oldSwapchains = std::move(mOldSwapchains);
-
     // The overlay is drawn after this.  This ensures that drawing the overlay does not interfere
     // with other functionality, especially counters used to validate said functionality.
     const bool shouldDrawOverlay = overlayHasEnabledWidget(contextVk);
 
-    ANGLE_TRY(contextVk->flushAndGetSerial(shouldDrawOverlay ? nullptr : presentSemaphore,
+    ANGLE_TRY(contextVk->flushAndGetSerial(shouldDrawOverlay ? nullptr : &presentSemaphore,
                                            swapSerial, RenderPassClosureReason::EGLSwapBuffers));
 
     if (shouldDrawOverlay)
     {
         updateOverlay(contextVk);
         ANGLE_TRY(drawOverlay(contextVk, &image));
-        ANGLE_TRY(contextVk->flushAndGetSerial(presentSemaphore, swapSerial,
+        ANGLE_TRY(contextVk->flushAndGetSerial(&presentSemaphore, swapSerial,
                                                RenderPassClosureReason::AlreadySpecifiedElsewhere));
     }
 
+    return angle::Result::Continue;
+}
+
+angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
+                                       const EGLint *rects,
+                                       EGLint n_rects,
+                                       const void *pNextChain,
+                                       bool *presentOutOfDate)
+{
+    ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::present");
+    RendererVk *renderer = contextVk->getRenderer();
+
+    // Get a new semaphore to use for present.
+    vk::Semaphore presentSemaphore;
+    ANGLE_TRY(NewSemaphore(contextVk, &mPresentSemaphoreRecycler, &presentSemaphore));
+
+    // Make a submission before present to flush whatever's pending.  In the very least, a
+    // submission is necessary to make sure the present semaphore is signaled.
+    QueueSerial swapSerial;
+    ANGLE_TRY(prePresentSubmit(contextVk, presentSemaphore, &swapSerial));
+
     VkPresentInfoKHR presentInfo   = {};
     presentInfo.sType              = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
     presentInfo.pNext              = pNextChain;
     presentInfo.waitSemaphoreCount = 1;
-    presentInfo.pWaitSemaphores    = presentSemaphore->ptr();
+    presentInfo.pWaitSemaphores    = presentSemaphore.ptr();
     presentInfo.swapchainCount     = 1;
     presentInfo.pSwapchains        = &mSwapchain;
     presentInfo.pImageIndices      = &mCurrentSwapchainImageIndex;
@@ -1800,13 +1885,102 @@ angle::Result WindowSurfaceVk::present(ContextVk *contextVk,
     // Set FrameNumber for the presented image.
     mSwapchainImages[mCurrentSwapchainImageIndex].mFrameNumber = mFrameCount++;
 
+    // Place the semaphore in the present history.  Schedule pending old swapchains to be destroyed
+    // at the same time the semaphore for this present can be destroyed.
+    mPresentHistory.emplace_back();
+    mPresentHistory.back().semaphore     = std::move(presentSemaphore);
+    mPresentHistory.back().oldSwapchains = std::move(mOldSwapchains);
+    mPresentHistory.back().imageIndex    = mCurrentSwapchainImageIndex;
+
+    // Clean up whatever present is already finished.
+    ANGLE_TRY(cleanUpPresentHistory(contextVk));
+
     ANGLE_TRY(computePresentOutOfDate(contextVk, result, presentOutOfDate));
 
+    ANGLE_TRY(throttleCPU(contextVk, swapSerial));
+
     contextVk->resetPerFramePerfCounters();
 
     return angle::Result::Continue;
 }
 
+angle::Result WindowSurfaceVk::throttleCPU(ContextVk *contextVk,
+                                           const QueueSerial &currentSubmitSerial)
+{
+    RendererVk *renderer = contextVk->getRenderer();
+
+    // Wait on the oldest serial and replace it with the newest as the circular buffer moves
+    // forward.
+    QueueSerial swapSerial = mSwapHistory.front();
+    mSwapHistory.front()   = currentSubmitSerial;
+    mSwapHistory.next();
+
+    if (swapSerial.valid())
+    {
+        ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::throttleCPU");
+        ANGLE_TRY(renderer->finishQueueSerial(contextVk, swapSerial));
+    }
+
+    return angle::Result::Continue;
+}
+
+angle::Result WindowSurfaceVk::cleanUpPresentHistory(vk::Context *context)
+{
+    const VkDevice device = context->getDevice();
+
+    while (!mPresentHistory.empty())
+    {
+        impl::ImagePresentOperation &presentOperation = mPresentHistory.front();
+
+        // If there is no fence associated with the history, it can't be cleaned up yet.
+        if (!presentOperation.fence.valid())
+        {
+            // Can't have an old present operations without a fence.
+            ASSERT(presentOperation.imageIndex != kInvalidImageIndex);
+            break;
+        }
+
+        // Otherwise check to see if the fence is signaled.
+        VkResult result = presentOperation.fence.getStatus(device);
+        if (result == VK_NOT_READY)
+        {
+            // Not yet
+            break;
+        }
+
+        ANGLE_VK_TRY(context, result);
+
+        presentOperation.destroy(device, &mPresentFenceRecycler, &mPresentSemaphoreRecycler);
+        mPresentHistory.pop_front();
+    }
+
+    // The present history can grow indefinitely if a present operation is done on an index that's
+    // never acquired in the future.  In that case, there's no fence associated with that present
+    // operation.  Move the offending entry to last, so the resources associated with the rest of
+    // the present operations can be duly freed.
+    if (mPresentHistory.size() > mSwapchainImages.size() * 2 &&
+        !mPresentHistory.front().fence.valid())
+    {
+        impl::ImagePresentOperation presentOperation = std::move(mPresentHistory.front());
+        mPresentHistory.pop_front();
+
+        // We can't be stuck on an a presentation to an old swapchain without a fence.
+        ASSERT(presentOperation.imageIndex != kInvalidImageIndex);
+
+        // Move clean up data to the next (now first) present operation, if any.  Note that there
+        // cannot be any clean up data on the rest of the present operations, because the first
+        // present already gathers every old swapchain to clean up.
+        ASSERT(!HasAnyOldSwapchains(mPresentHistory));
+        mPresentHistory.front().oldSwapchains = std::move(presentOperation.oldSwapchains);
+
+        // Put the present operation at the end of the queue so it's revisited after the rest of the
+        // present operations are cleaned up.
+        mPresentHistory.push_back(std::move(presentOperation));
+    }
+
+    return angle::Result::Continue;
+}
+
 angle::Result WindowSurfaceVk::swapImpl(const gl::Context *context,
                                         const EGLint *rects,
                                         EGLint n_rects,
@@ -1959,16 +2133,33 @@ VkResult WindowSurfaceVk::acquireNextSwapchainImage(vk::Context *context)
 
     const vk::Semaphore *acquireImageSemaphore = &mAcquireImageSemaphores.front();
 
-    VkResult result =
+    // Associate a fence with this acquire.  It will be used to know when to recycle the semaphore
+    // used to previously present the returned image.  Note that while this fence is provided to
+    // vkAcquireNextImageKHR, it's actually about the present operation.  There is currently no way
+    // to associate the fence with the present operation itself, so this is a hack.
+    vk::Fence presentFence;
+    VkResult result = NewFence(context, &mPresentFenceRecycler, &presentFence);
+    if (result != VK_SUCCESS)
+    {
+        return result;
+    }
+
+    result =
         vkAcquireNextImageKHR(device, mSwapchain, UINT64_MAX, acquireImageSemaphore->getHandle(),
-                              VK_NULL_HANDLE, &mCurrentSwapchainImageIndex);
+                              presentFence.getHandle(), &mCurrentSwapchainImageIndex);
 
     // VK_SUBOPTIMAL_KHR is ok since we still have an Image that can be presented successfully
     if (ANGLE_UNLIKELY(result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR))
     {
+        // On failure, the fence is going to be untouched, so it can be recycled right away.
+        mPresentFenceRecycler.recycle(std::move(presentFence));
         return result;
     }
 
+    // Associate the present fence with the last present operation.
+    AssociateFenceWithPresentHistory(mCurrentSwapchainImageIndex, std::move(presentFence),
+                                     &mPresentHistory);
+
     SwapchainImage &image = mSwapchainImages[mCurrentSwapchainImageIndex];
 
     // Single Image Mode
diff --git a/src/libANGLE/renderer/vulkan/SurfaceVk.h b/src/libANGLE/renderer/vulkan/SurfaceVk.h
index be453b726b..dfb4b46c9a 100644
--- a/src/libANGLE/renderer/vulkan/SurfaceVk.h
+++ b/src/libANGLE/renderer/vulkan/SurfaceVk.h
@@ -129,8 +129,8 @@ namespace impl
 {
 static constexpr size_t kSwapHistorySize = 2;
 
-// Old swapchain and associated present semaphores that need to be scheduled for destruction when
-// appropriate.
+// Old swapchain and associated present semaphores that need to be scheduled for
+// recycling/destruction when appropriate.
 struct SwapchainCleanupData : angle::NonCopyable
 {
     SwapchainCleanupData();
@@ -141,27 +141,35 @@ struct SwapchainCleanupData : angle::NonCopyable
 
     // The swapchain to be destroyed.
     VkSwapchainKHR swapchain = VK_NULL_HANDLE;
-    // Any present semaphores that were pending destruction at the time the swapchain was
-    // recreated will be scheduled for destruction at the same time as the swapchain.
+    // Any present semaphores that were pending recycle at the time the swapchain was recreated will
+    // be scheduled for recycling at the same time as the swapchain's destruction.
     std::vector<vk::Semaphore> semaphores;
 };
 
-// A circular buffer per image stores the semaphores used for presenting that image.  Taking the
-// swap history into account, only the oldest semaphore is guaranteed to be no longer in use by the
-// presentation engine.  See doc/PresentSemaphores.md for details.
+// Each present operation is associated with a wait semaphore.  To know when that semaphore can be
+// recycled, a fence is used in the call to vkAcquireNextImageKHR.  When that fence is signaled, the
+// semaphore used in the last present operation involving the returned image can be recycled.  See
+// doc/PresentSemaphores.md for details.
 //
-// Old swapchains are scheduled to be destroyed at the same time as the first semaphore used to
-// present an image of the new swapchain.  This is to ensure that the presentation engine is no
-// longer presenting an image from the old swapchain.
-struct ImagePresentHistory : angle::NonCopyable
+// Old swapchains are scheduled to be destroyed at the same time as the last wait semaphore used to
+// present an image to the old swapchains can be recycled.
+struct ImagePresentOperation : angle::NonCopyable
 {
-    ImagePresentHistory();
-    ImagePresentHistory(ImagePresentHistory &&other);
-    ImagePresentHistory &operator=(ImagePresentHistory &&other);
-    ~ImagePresentHistory();
+    ImagePresentOperation();
+    ImagePresentOperation(ImagePresentOperation &&other);
+    ImagePresentOperation &operator=(ImagePresentOperation &&other);
+    ~ImagePresentOperation();
 
+    void destroy(VkDevice device,
+                 vk::Recycler<vk::Fence> *fenceRecycler,
+                 vk::Recycler<vk::Semaphore> *semaphoreRecycler);
+
+    vk::Fence fence;
     vk::Semaphore semaphore;
     std::vector<SwapchainCleanupData> oldSwapchains;
+
+    // Used to associate an acquire fence with the previous present operation of the image.
+    uint32_t imageIndex;
 };
 
 // Swapchain images and their associated objects.
@@ -177,9 +185,6 @@ struct SwapchainImage : angle::NonCopyable
     vk::Framebuffer fetchFramebuffer;
     vk::Framebuffer framebufferResolveMS;
 
-    // A circular array of semaphores used for presenting this image.
-    static constexpr size_t kPresentHistorySize = kSwapHistorySize + 1;
-    angle::CircularBuffer<ImagePresentHistory, kPresentHistorySize> presentHistory;
     uint64_t mFrameNumber = 0;
 };
 }  // namespace impl
@@ -350,18 +355,27 @@ class WindowSurfaceVk : public SurfaceVk
     angle::Result computePresentOutOfDate(vk::Context *context,
                                           VkResult result,
                                           bool *presentOutOfDate);
+    angle::Result prePresentSubmit(ContextVk *contextVk,
+                                   const vk::Semaphore &presentSemaphore,
+                                   QueueSerial *swapSerial);
     angle::Result present(ContextVk *contextVk,
                           const EGLint *rects,
                           EGLint n_rects,
                           const void *pNextChain,
                           bool *presentOutOfDate);
 
+    angle::Result cleanUpPresentHistory(vk::Context *context);
+
+    // Throttle the CPU such that application's logic and command buffer recording doesn't get more
+    // than two frame ahead of the frame being rendered (and three frames ahead of the one being
+    // presented).  This is a failsafe, as the application should ensure command buffer recording is
+    // not ahead of the frame being rendered by *one* frame.
+    angle::Result throttleCPU(ContextVk *contextVk, const QueueSerial &currentSubmitSerial);
+
     void updateOverlay(ContextVk *contextVk) const;
     bool overlayHasEnabledWidget(ContextVk *contextVk) const;
     angle::Result drawOverlay(ContextVk *contextVk, impl::SwapchainImage *image) const;
 
-    angle::Result newPresentSemaphore(vk::Context *context, vk::Semaphore *semaphoreOut);
-
     bool isMultiSampled() const;
 
     bool supportsPresentMode(vk::PresentMode presentMode) const;
@@ -378,7 +392,10 @@ class WindowSurfaceVk : public SurfaceVk
     VkCompositeAlphaFlagBitsKHR mCompositeAlpha;
 
     // A circular buffer that stores the serial of the submission fence of the context on every
-    // swap. The CPU is throttled by waiting for the 2nd previous serial to finish.
+    // swap. The CPU is throttled by waiting for the 2nd previous serial to finish.  This should
+    // normally be a no-op, as the application should pace itself to avoid input lag, and is
+    // implemented in ANGLE as a fail safe.  Removing this throttling requires untangling it from
+    // acquire semaphore recycling (see mAcquireImageSemaphores below)
     angle::CircularBuffer<QueueSerial, impl::kSwapHistorySize> mSwapHistory;
 
     // The previous swapchain which needs to be scheduled for destruction when appropriate.  This
@@ -398,16 +415,18 @@ class WindowSurfaceVk : public SurfaceVk
     // number of semaphores that are used to acquire swapchain images, and that is
     // kSwapHistorySize+1:
     //
-    //                    Unrelated submission in      Submission as part of
-    //                      the middle of frame            buffer swap
-    //                               |                          |
-    //                               V                          V
-    //     Frame i:     ... ANI ... QS (fence Fa) ... Wait(..) QS (Fence Fb) QP
-    //     Frame i+1:   ... ANI ... QS (fence Fc) ... Wait(..) QS (Fence Fd) QP
-    //     Frame i+2:   ... ANI ... QS (fence Fe) ... Wait(Fb) QS (Fence Ff) QP
-    //                                                 ^
-    //                                                 |
-    //                                          CPU throttling
+    //             Unrelated submission in     Submission as part of
+    //               the middle of frame          buffer swap
+    //                              |                 |
+    //                              V                 V
+    //     Frame i:     ... ANI ... QS (fence Fa) ... QS (Fence Fb) QP Wait(..)
+    //     Frame i+1:   ... ANI ... QS (fence Fc) ... QS (Fence Fd) QP Wait(..) <--\
+    //     Frame i+2:   ... ANI ... QS (fence Fe) ... QS (Fence Ff) QP Wait(Fb)    |
+    //                                                                  ^          |
+    //                                                                  |          |
+    //                                                           CPU throttling    |
+    //                                                                             |
+    //                               Note: app should throttle itself here (equivalent of Wait(Fb))
     //
     // In frame i+2 (2 is kSwapHistorySize), ANGLE waits on fence Fb which means that the semaphore
     // used for Frame i's ANI can be reused (because Fb-is-signalled implies Fa-is-signalled).
@@ -423,6 +442,11 @@ class WindowSurfaceVk : public SurfaceVk
     // During window resizing when swapchains are recreated every frame, the number of in-flight
     // present semaphores can grow indefinitely.  See doc/PresentSemaphores.md.
     vk::Recycler<vk::Semaphore> mPresentSemaphoreRecycler;
+    // Fences are associated with present semaphores to know when they can be recycled.
+    vk::Recycler<vk::Fence> mPresentFenceRecycler;
+
+    // The presentation history, used to recycle semaphores and destroy old swapchains.
+    std::deque<impl::ImagePresentOperation> mPresentHistory;
 
     // Depth/stencil image.  Possibly multisampled.
     vk::ImageHelper mDepthStencilImage;
diff --git a/src/libANGLE/renderer/vulkan/doc/PresentSemaphores.md b/src/libANGLE/renderer/vulkan/doc/PresentSemaphores.md
index c3ff7a535e..698a9a8e17 100644
--- a/src/libANGLE/renderer/vulkan/doc/PresentSemaphores.md
+++ b/src/libANGLE/renderer/vulkan/doc/PresentSemaphores.md
@@ -24,7 +24,7 @@ following:
 
     CPU: ANI  ... QS   ... QP
          S:S1     W:S1     W:S2
-                  S:S2
+         S:F1     S:S2
     GPU:          <------------ R ----------->
      PE:                                      <-------- P ------>
 
@@ -32,135 +32,60 @@ That is, the GPU starts rendering after submission, and the presentation is star
 finished.  Note that Vulkan tries to abstract a large variety of PE architectures, some of which do
 not behave in a straight-forward manner.  As such, ANGLE cannot know what the PE is exactly doing
 with the images or when the images are visible on the screen.  The only signal out of the PE is
-received through the semaphore that's used in ANI.
-
-With multiple frames, the pipeline looks different based on present mode.  Let's focus on
-FIFO (the arguments in this document translate to all modes) with 3 images:
-
-    CPU: QS QP QS QP QS QP QS QP
-         I1 I1 I2 I2 I3 I3 I1 I1
-    GPU: <---- R I1 ----><---- R I2 ----><---- R I3 ----><---- R I1 ---->
-     PE:                 <----- P I1 -----><----- P I2 -----><----- P I3 -----><----- P I1 ----->
-
-First, an issue is evident here.  The CPU is submitting jobs and queuing images for presentation
-faster than the GPU can render them or the PE can view them.  This can cause the length of the
-submit queue to grow indefinitely, resulting in larger and larger input lag.  In FIFO mode, the PE
-present queue also grows indefinitely.
-
-To address this issue, ANGLE paces the CPU such that the length of the submit queue is kept at a
-maximum of 1 image (i.e. submission with one image is being processed, and another one is in queue):
-
-    CPU: QS   QS          W:F1 QS         W:F2 QS
-         I1   I2               I3              I1
-         S:F1 S:F2             S:F3            S:F4
-    GPU: <---- R I1 ----><---- R I2 ----><---- R I3 ----><---- R I1 ---->
-
-> Note: Ideally, the length of the PE present queue should also be kept at a maximum of 1 (i.e. one
-> image being presented, and another in queue).  However, the Vulkan WSI extension doesn't provide
-> enough control to achieve this.  In heavy application, the length of the PE present queue is
-> probably 1 anyway (as the rendering time is almost as long as the frame (i.e. present time), in
-> which case pacing the submissions similarly paces the presentation).  In theory, in FIFO mode, the
-> length of the PE present queue is below n+2 where n is the number of swapchain images.
->
-> To understand why, imagine a FIFO swapchain with 1000 images and submissions that are
-> infinitesimally short.  In this case, the CPU pacing is effectively a no-op (as the GPU instantly
-> finishes jobs) for the first 1002 submissions.  The 1003rd submission waits for F1001 (which uses
-> I1).  However, the 1001st submission will not start until the PE switches to presenting I2 (at the
-> next V-Sync).  The CPU then waits for V-Sync before the 1003rd submission.  The CPU waits for one
-> V-Sync for every subsequent submission, keeping the length of the queue 1002.
-> [`VK_GOOGLE_display_timing`][DisplayTimingGOOGLE] is likely a solution to this problem.
-
-Associated with each QP operation is a semaphore signaled by the preceding QS and waited on by the
-PE before the image can be presented.  Currently, there's no feedback from Vulkan (See [internal
-Khronos issue][VulkanIssue1060]) regarding _when_ the PE has actually finished waiting on the
-semaphore!  This means that the application cannot generally know when to destroy the corresponding
-semaphore.  However, taking ANGLE's CPU pacing into account, we are able to destroy (or rather
-reuse) semaphores when they are provably unused.
-
-This document describes an approach for destroying semaphores that should work with all valid PE
-architectures, but will be described in terms of more common PE architectures (e.g. where the PE
-only backs each VkImage and VkSemaphore handle with one actual memory object, and where the PE
-cycles between the swapchain images in a straight-forward manner).
-
-The interested reader may follow the discussion in this abandoned [gerrit CL][CL1757018] for more
-background and ideas.
-
-[DisplayTimingGOOGLE]: https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VK_GOOGLE_display_timing.html
-[VulkanIssue1060]: https://gitlab.khronos.org/vulkan/vulkan/issues/1060
-[CL1757018]: https://chromium-review.googlesource.com/c/angle/angle/+/1757018
+received through the semaphore and fence that's used in ANI.
 
-## Determining When a QP Semaphore is Waited On
-
-Let's combine the above diagrams with all the details:
+The issue is that, in the above diagram, it's unclear when S2 can be destroyed or recycled.  That
+happens when rendering (R) is finished and before present (P) starts.  As a result, this time has to
+be inferred by a future operation.
 
-    CPU: ANI   | QS    | QP    | ANI   | QS    | QP    | ANI   | W:F1 | QS    | QP    | ANI   | W:F2 | QS    | QP
-         I1    | I1    | I1    | I2    | I2    | I2    | I3    |      | I3    | I3    | I1    |      | I1    | I1
-         S:SA1 | W:SA1 |       | S:SA2 | W:SA2 |       | S:SA3 |      | W:SA3 |       | S:SA4 |      | W:SA4 |
-               | S:SP1 | W:SP1 |       | S:SP2 | W:SP2 |       |      | S:SP3 | W:SP3 |       |      | S:SP4 | W:SP4
-               | S:F1  |       |       | S:F2  |       |       |      | S:F3  |       |       |      | S:F4  |
+## Determining When a QP Semaphore is Waited On
 
-Let's focus only on sequences that return the same image:
+The ANI call takes a fence, that is signaled once the image is acquired.  When that happens, it can
+be inferred that the previous presentation of the image is done, which in turn implies that its
+associated wait semaphore is no longer in use.
 
-    CPU: ANI   | W:F(X-2) | QS    | QP    | ... | ANI   | W:F(Y-2) | QS    | QP
-         I1    |          | I1    | I1    |     | I1    |          | I1    | I1
-         S:SAX |          | W:SAX |       |     | S:SAY |          | W:SAY |
-               |          | S:SPX | W:SPX |     |       |          | S:SPY | W:SPY
-               |          | S:FX  |       |     |       |          | S:FY  |
+Assuming both ANI calls below return the same index:
 
-Note that X and Y are arbitrarily distanced (including possibly being sequential).
+    CPU: ANI  ... QS   ... QP         ANI  ... QS   ... QP
+         S:S1     W:S1     W:S2       S:S3     W:S3     W:S4
+         S:F1     S:S2                S:F2     S:S4
+    GPU:          <------ R ------>            <------ R ------>
+     PE:                           <-- P -->                    <-- P -->
 
-Say we are at frame Y+2.  There's therefore a wait on FY.  The following holds:
+The following holds:
 
-    FY is signaled
-    => SAY is signaled
-    => The PE has handed I1 back to the application
-    => The PE has already processed the *previous* QP of I1
-    => SPX is waited on
+    F2 is signaled
+    => The PE has handed the image to the application
+    => The PE is no longer presenting the image (the first P operation is finished)
+    => The PE is done waiting on S2
 
-At this point, we can destroy SPX.  In other words, in frame Y+2, we can destroy SPX (note that 2 is
-the number of frames the CPU pacing code uses).  If frame Y+1 is not using I1, this means the
-history of present semaphores for I1 would be `{SPX, SPY}` and we can destroy the oldest semaphore
-in this list.  If frame Y+1 is also using I1, we should still destroy SPX in frame Y+2, but the
-history of the present semaphores for I1 would be `{SPX, SPY, SP(Y+1)}`.
+At this point, we can destroy or recycle S2.  To implement this, a history of present operations is
+maintained, which includes the wait semaphore used with that presentation.  Associated with each
+present operation, is a fence that is used to determine when that semaphore can be destroyed.
 
-In the Vulkan backend, we simplify destruction of semaphores by always keeping a history of 3
-present semaphores for each image (again, 3 is H+1 where H is the swap history size used in CPU
-pacing) and always reuse (instead of destroy) the oldest semaphore of the image that is about to be
-presented.
+Since the fence is not actually known at present time (QP), the present operation is kept in history
+without an associated fence.  Once ANI returns the same index, the fence given to ANI is associated
+with the previous QP of that index.
 
-To summarize, we use the completion of a submission using an image to prove when the semaphore used
-for the *previous* presentation of that image is no longer in use (and can be safely destroyed or
-reused).
+After each present call, the present history is inspected.  Any present operation whose fence is
+signaled is cleaned up.
 
 ## Swapchain recreation
 
 When recreating the swapchain, all images are eventually freed and new ones are created, possibly
-with a different count and present mode.  For the old swapchain, we can no longer rely on the
-completion of a future submission to know when a previous presentation's semaphore can be destroyed,
-as there won't be any more submissions using images from the old swapchain.
-
-> For example, imagine the old swapchain was created in FIFO mode, and one image is being presented
-> until the next V-Sync.  Furthermore, imagine the new swapchain is created in MAILBOX mode.  Since
-> the old swapchain's image will remain presented until V-Sync, the new MAILBOX swapchain can
-> perform an arbitrarily large number of (throw-away) presentations.  The old swapchain (and its
-> associated present semaphores) cannot be destroyed until V-Sync; a signal that's not captured by
-> Vulkan.
+with a different count and present mode.  For the old swapchain, we can no longer rely on a future
+ANI to know when a previous presentation's semaphore can be destroyed, as there won't be any more
+acquisitions from the old swapchain.  Similarly, we cannot know when the old swapchain itself can be
+destroyed.
 
 ANGLE resolves this issue by deferring the destruction of the old swapchain and its remaining
 present semaphores to the time when the semaphore corresponding to the first present of the new
-swapchain can be destroyed.  In the example in the previous section, if SPX is the present semaphore
-of the first QP performed on the new swapchain, at frame Y+2, when we know SPX can be destroyed, we
-know that the first image of the new swapchain has already been presented.  This proves that all
-previous QPs of the old swapchain have been processed.
-
-> Note: the swapchain can potentially be destroyed much earlier, but with no feedback from the
-> presentation engine, we cannot know that.  This delays means that the swapchain could be recreated
-> while there are pending old swapchains to be destroyed.  The destruction of both old swapchains
-> must now be deferred to when the first QP of the new swapchain has been processed.  If an
-> application resizes the window constantly and at a high rate, ANGLE would keep accumulating old
-> swapchains and not free them until it stops.  While a user will likely not be able to do this (as
-> the rate of window system events is lower than the framerate), this can be programmatically done
-> (as indeed done in EGL dEQP tests).  Nvidia for example fails creation of a new swapchain if there
-> are already 20 allocated (on desktop, or less than ten on Quadro).  If the backlog of old
-> swapchains get larger than a threshold, ANGLE calls `vkQueueWaitIdle()` and destroys the
-> swapchains.
+swapchain can be destroyed.  Because once the first present semaphore of the new swapchain can be
+destroyed, the first present operation of the new swapchain is done, which means the old swapchain
+is no longer being presented.
+
+Note that the swapchain may be recreated without a second acquire.  This means that the swapchain
+could be recreated while there are pending old swapchains to be destroyed.  The destruction of both
+old swapchains must now be deferred to when the first QP of the new swapchain has been processed.
+If an application resizes the window constantly and at a high rate, ANGLE would keep accumulating
+old swapchains and not free them until it stops.
diff --git a/src/libGLESv2/entry_points_gles_ext_autogen.cpp b/src/libGLESv2/entry_points_gles_ext_autogen.cpp
index e993c714e8..e0114d0d2a 100644
--- a/src/libGLESv2/entry_points_gles_ext_autogen.cpp
+++ b/src/libGLESv2/entry_points_gles_ext_autogen.cpp
@@ -4596,6 +4596,7 @@ void GL_APIENTRY GL_EGLImageTargetTexStorageEXT(GLenum target,
                                                 GLeglImageOES image,
                                                 const GLint *attrib_list)
 {
+    ANGLE_SCOPED_GLOBAL_LOCK();
     Context *context = GetValidGlobalContext();
     EVENT(context, GLEGLImageTargetTexStorageEXT,
           "context = %d, target = %s, image = 0x%016" PRIxPTR ", attrib_list = 0x%016" PRIxPTR "",
@@ -4629,6 +4630,7 @@ void GL_APIENTRY GL_EGLImageTargetTextureStorageEXT(GLuint texture,
                                                     GLeglImageOES image,
                                                     const GLint *attrib_list)
 {
+    ANGLE_SCOPED_GLOBAL_LOCK();
     Context *context = GetValidGlobalContext();
     EVENT(context, GLEGLImageTargetTextureStorageEXT,
           "context = %d, texture = %u, image = 0x%016" PRIxPTR ", attrib_list = 0x%016" PRIxPTR "",
@@ -9919,6 +9921,7 @@ void GL_APIENTRY GL_BlitFramebufferNV(GLint srcX0,
 // GL_OES_EGL_image
 void GL_APIENTRY GL_EGLImageTargetRenderbufferStorageOES(GLenum target, GLeglImageOES image)
 {
+    ANGLE_SCOPED_GLOBAL_LOCK();
     Context *context = GetValidGlobalContext();
     EVENT(context, GLEGLImageTargetRenderbufferStorageOES,
           "context = %d, target = %s, image = 0x%016" PRIxPTR "", CID(context),
@@ -9950,6 +9953,7 @@ void GL_APIENTRY GL_EGLImageTargetRenderbufferStorageOES(GLenum target, GLeglIma
 
 void GL_APIENTRY GL_EGLImageTargetTexture2DOES(GLenum target, GLeglImageOES image)
 {
+    ANGLE_SCOPED_GLOBAL_LOCK();
     Context *context = GetValidGlobalContext();
     EVENT(context, GLEGLImageTargetTexture2DOES,
           "context = %d, target = %s, image = 0x%016" PRIxPTR "", CID(context),
diff --git a/src/tests/gl_tests/VulkanPerformanceCounterTest.cpp b/src/tests/gl_tests/VulkanPerformanceCounterTest.cpp
index 2767193173..5341b15feb 100644
--- a/src/tests/gl_tests/VulkanPerformanceCounterTest.cpp
+++ b/src/tests/gl_tests/VulkanPerformanceCounterTest.cpp
@@ -649,6 +649,7 @@ TEST_P(VulkanPerformanceCounterTest, SubmittingOutsideCommandBufferDoesNotCollec
     ASSERT_EQ(posLoc, glGetAttribLocation(program2, essl1_shaders::PositionAttrib()));
 
     // Issue uploads until there's an implicit submission
+    size_t textureCount = 0;
     while (getPerfCounters().vkQueueSubmitCallsTotal == submitCommandsCount)
     {
         GLTexture newTexture;
@@ -660,7 +661,10 @@ TEST_P(VulkanPerformanceCounterTest, SubmittingOutsideCommandBufferDoesNotCollec
 
         glDrawArrays(GL_TRIANGLES, 0, 6);
         ASSERT_GL_NO_ERROR();
+        textureCount++;
     }
+    // 256x256 texture upload should not trigger a submission
+    ASSERT(textureCount > 1);
 
     ++submitCommandsCount;
     EXPECT_EQ(getPerfCounters().vkQueueSubmitCallsTotal, submitCommandsCount);
@@ -679,13 +683,6 @@ TEST_P(VulkanPerformanceCounterTest, SubmittingOutsideCommandBufferDoesNotCollec
     EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
     ++submitCommandsCount;
 
-    // When the preferSubmitAtFBOBoundary feature is enabled, the render pass closure causes an
-    // extra submission.
-    if (hasPreferSubmitAtFBOBoundary())
-    {
-        ++submitCommandsCount;
-    }
-
     // Verify counters.
     EXPECT_EQ(getPerfCounters().renderPasses, expectedRenderPassCount);
     EXPECT_EQ(getPerfCounters().vkQueueSubmitCallsTotal, submitCommandsCount);
author	android-autoroll <android-autoroll@skia-public.iam.gserviceaccount.com>	2022-12-01 01:21:06 +0000
committer	Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>	2022-12-01 01:21:06 +0000
commit	1d9f62859e5709bee647813285cf42dd253396d6 (patch)
tree	64414618b227dd73fb7e060051dd4ab89e2e7f8b
parent	6eeb92d98c8d1db3000b1b9fbfc5663853bfe80c (diff)
parent	9cdc28b0fc595bd00cc5ca9133c82c065f504611 (diff)
download	angle-main-16k.tar.gz