Flutter Impeller
gpu_tracer_vk.cc
Go to the documentation of this file.
1 // Copyright 2013 The Flutter Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
6 
7 #include <memory>
8 #include <optional>
9 #include <thread>
10 #include <utility>
11 #include "fml/logging.h"
12 #include "fml/trace_event.h"
15 #include "vulkan/vulkan.hpp"
16 
17 namespace impeller {
18 
19 static constexpr uint32_t kPoolSize = 1024u;
20 
21 GPUTracerVK::GPUTracerVK(const std::shared_ptr<DeviceHolder>& device_holder)
22  : device_holder_(device_holder) {
23  timestamp_period_ = device_holder_->GetPhysicalDevice()
24  .getProperties()
25  .limits.timestampPeriod;
26  if (timestamp_period_ <= 0) {
27  // The device does not support timestamp queries.
28  return;
29  }
30  // Disable tracing in release mode.
31 #ifdef IMPELLER_DEBUG
32  enabled_ = true;
33 #endif
34 }
35 
36 bool GPUTracerVK::IsEnabled() const {
37  return enabled_;
38 }
39 
41  FML_DCHECK(!in_frame_);
42  in_frame_ = true;
43  raster_thread_id_ = std::this_thread::get_id();
44 }
45 
47  if (!enabled_) {
48  return;
49  }
50 
51  Lock lock(trace_state_mutex_);
52  current_state_ = (current_state_ + 1) % kTraceStatesSize;
53 
54  auto& state = trace_states_[current_state_];
55  // If there are still pending buffers on the trace state we're switching to,
56  // that means that a cmd buffer we were relying on to signal this likely
57  // never finished. This shouldn't happen unless there is a bug in the
58  // encoder logic. We set it to zero anyway to prevent a validation error
59  // from becoming a memory leak.
60  FML_DCHECK(state.pending_buffers == 0u);
61 
62  state.pending_buffers = 0;
63  state.current_index = 0;
64  in_frame_ = false;
65 }
66 
67 std::unique_ptr<GPUProbe> GPUTracerVK::CreateGPUProbe() {
68  return std::make_unique<GPUProbe>(weak_from_this());
69 }
70 
71 void GPUTracerVK::RecordCmdBufferStart(const vk::CommandBuffer& buffer,
72  GPUProbe& probe) {
73  if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
74  !in_frame_) {
75  return;
76  }
77  Lock lock(trace_state_mutex_);
78  auto& state = trace_states_[current_state_];
79 
80  // Initialize the query pool for the first query on each frame.
81  if (state.pending_buffers == 0) {
82  vk::QueryPoolCreateInfo info;
83  info.queryCount = kPoolSize;
84  info.queryType = vk::QueryType::eTimestamp;
85 
86  auto [status, pool] =
87  device_holder_->GetDevice().createQueryPoolUnique(info);
88  if (status != vk::Result::eSuccess) {
89  VALIDATION_LOG << "Failed to create query pool.";
90  return;
91  }
92  trace_states_[current_state_].query_pool = std::move(pool);
93  buffer.resetQueryPool(trace_states_[current_state_].query_pool.get(), 0,
94  kPoolSize);
95  }
96 
97  // We size the query pool to kPoolSize, but Flutter applications can create an
98  // unbounded amount of work per frame. If we encounter this, stop recording
99  // cmds.
100  if (state.current_index >= kPoolSize) {
101  return;
102  }
103 
104  buffer.writeTimestamp(vk::PipelineStageFlagBits::eTopOfPipe,
105  trace_states_[current_state_].query_pool.get(),
106  state.current_index);
107  state.current_index += 1;
108  if (!probe.index_.has_value()) {
109  state.pending_buffers += 1;
110  probe.index_ = current_state_;
111  }
112 }
113 
114 void GPUTracerVK::RecordCmdBufferEnd(const vk::CommandBuffer& buffer,
115  GPUProbe& probe) {
116  if (!enabled_ || std::this_thread::get_id() != raster_thread_id_ ||
117  !in_frame_) {
118  return;
119  }
120  Lock lock(trace_state_mutex_);
121  GPUTraceState& state = trace_states_[current_state_];
122 
123  if (state.current_index >= kPoolSize) {
124  return;
125  }
126 
127  buffer.writeTimestamp(vk::PipelineStageFlagBits::eBottomOfPipe,
128  state.query_pool.get(), state.current_index);
129 
130  state.current_index += 1;
131  if (!probe.index_.has_value()) {
132  state.pending_buffers += 1;
133  probe.index_ = current_state_;
134  }
135 }
136 
137 void GPUTracerVK::OnFenceComplete(size_t frame_index) {
138  if (!enabled_) {
139  return;
140  }
141  Lock lock(trace_state_mutex_);
142  GPUTraceState& state = trace_states_[frame_index];
143 
144  FML_DCHECK(state.pending_buffers > 0);
145  state.pending_buffers -= 1;
146 
147  if (state.pending_buffers == 0) {
148  auto buffer_count = state.current_index;
149  std::vector<uint64_t> bits(buffer_count);
150 
151  auto result = device_holder_->GetDevice().getQueryPoolResults(
152  state.query_pool.get(), 0, state.current_index,
153  buffer_count * sizeof(uint64_t), bits.data(), sizeof(uint64_t),
154  vk::QueryResultFlagBits::e64);
155  // This may return VK_NOT_READY if the query couldn't be completed, or if
156  // there are queries still pending. From local testing, this happens
157  // occassionally on very expensive frames. Its unclear if we can do anything
158  // about this, because by design this should only signal after all cmd
159  // buffers have signaled. Adding VK_QUERY_RESULT_WAIT_BIT to the flags
160  // passed to getQueryPoolResults seems like it would fix this, but actually
161  // seems to result in more stuck query errors. Better to just drop them and
162  // move on.
163  if (result != vk::Result::eSuccess) {
164  return;
165  }
166 
167  uint64_t smallest_timestamp = std::numeric_limits<uint64_t>::max();
168  uint64_t largest_timestamp = 0;
169  for (auto i = 0u; i < bits.size(); i++) {
170  smallest_timestamp = std::min(smallest_timestamp, bits[i]);
171  largest_timestamp = std::max(largest_timestamp, bits[i]);
172  }
173  auto gpu_ms =
174  (((largest_timestamp - smallest_timestamp) * timestamp_period_) /
175  1000000);
176  FML_TRACE_COUNTER("flutter", "GPUTracer",
177  reinterpret_cast<int64_t>(this), // Trace Counter ID
178  "FrameTimeMS", gpu_ms);
179  }
180 }
181 
182 GPUProbe::GPUProbe(const std::weak_ptr<GPUTracerVK>& tracer)
183  : tracer_(tracer) {}
184 
186  if (!index_.has_value()) {
187  return;
188  }
189  auto tracer = tracer_.lock();
190  if (!tracer) {
191  return;
192  }
193  tracer->OnFenceComplete(index_.value());
194 }
195 
196 void GPUProbe::RecordCmdBufferStart(const vk::CommandBuffer& buffer) {
197  auto tracer = tracer_.lock();
198  if (!tracer) {
199  return;
200  }
201  tracer->RecordCmdBufferStart(buffer, *this);
202 }
203 
204 void GPUProbe::RecordCmdBufferEnd(const vk::CommandBuffer& buffer) {
205  auto tracer = tracer_.lock();
206  if (!tracer) {
207  return;
208  }
209  tracer->RecordCmdBufferEnd(buffer, *this);
210 }
211 
212 } // namespace impeller
gpu_tracer_vk.h
impeller::GPUProbe::~GPUProbe
~GPUProbe()
Definition: gpu_tracer_vk.cc:185
impeller::GPUTracerVK::MarkFrameStart
void MarkFrameStart()
Signal the start of a frame workload.
Definition: gpu_tracer_vk.cc:40
impeller::GPUProbe::GPUProbe
GPUProbe(const std::weak_ptr< GPUTracerVK > &tracer)
Definition: gpu_tracer_vk.cc:182
impeller::GPUTracerVK::GPUTracerVK
GPUTracerVK(const std::shared_ptr< DeviceHolder > &device_holder)
Definition: gpu_tracer_vk.cc:21
impeller::Lock
Definition: thread.h:75
impeller::GPUTracerVK::MarkFrameEnd
void MarkFrameEnd()
Signal the end of a frame workload.
Definition: gpu_tracer_vk.cc:46
validation.h
impeller::GPUProbe
Definition: gpu_tracer_vk.h:89
VALIDATION_LOG
#define VALIDATION_LOG
Definition: validation.h:67
impeller::GPUProbe::RecordCmdBufferStart
void RecordCmdBufferStart(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record start time.
Definition: gpu_tracer_vk.cc:196
context_vk.h
impeller::kPoolSize
static constexpr uint32_t kPoolSize
Definition: gpu_tracer_vk.cc:19
impeller::GPUTracerVK::CreateGPUProbe
std::unique_ptr< GPUProbe > CreateGPUProbe()
Create a GPUProbe to trace the execution of a command buffer on the GPU.
Definition: gpu_tracer_vk.cc:67
impeller::GPUTracerVK::IsEnabled
bool IsEnabled() const
Definition: gpu_tracer_vk.cc:36
impeller::GPUProbe::RecordCmdBufferEnd
void RecordCmdBufferEnd(const vk::CommandBuffer &buffer)
Record a timestamp query into the provided cmd buffer to record end time.
Definition: gpu_tracer_vk.cc:204
impeller
Definition: aiks_context.cc:10