5 #include "flutter/fml/synchronization/waitable_event.h"
6 #include "flutter/fml/time/time_point.h"
7 #include "flutter/testing/testing.h"
8 #include "gmock/gmock.h"
11 #include "impeller/fixtures/sample.comp.h"
12 #include "impeller/fixtures/stage1.comp.h"
13 #include "impeller/fixtures/stage2.comp.h"
21 #include "impeller/renderer/prefix_sum_test.comp.h"
22 #include "impeller/renderer/threadgroup_sizing_test.comp.h"
30 auto context = GetContext();
32 ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
36 using CS = SampleComputeShader;
37 auto context = GetContext();
39 ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
43 SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
44 ASSERT_TRUE(pipeline_desc.has_value());
45 auto compute_pipeline =
46 context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
47 ASSERT_TRUE(compute_pipeline);
49 auto cmd_buffer = context->CreateCommandBuffer();
50 auto pass = cmd_buffer->CreateComputePass();
51 ASSERT_TRUE(pass && pass->IsValid());
53 static constexpr
size_t kCount = 5;
55 pass->SetGridSize(
ISize(kCount, 1));
56 pass->SetThreadGroupSize(
ISize(kCount, 1));
61 CS::Info info{.count = kCount};
62 CS::Input0<kCount> input_0;
63 CS::Input1<kCount> input_1;
64 for (
size_t i = 0; i < kCount; i++) {
65 input_0.elements[i] =
Vector4(2.0 + i, 3.0 + i, 4.0 + i, 5.0 * i);
66 input_1.elements[i] =
Vector4(6.0, 7.0, 8.0, 9.0);
69 input_0.fixed_array[1] =
IPoint32(2, 2);
72 input_1.some_struct = CS::SomeStruct{.vf =
Point(3, 4), .i = 42};
74 auto output_buffer = CreateHostVisibleDeviceBuffer<CS::Output<kCount>>(
75 context,
"Output Buffer");
77 CS::BindInfo(cmd, pass->GetTransientsBuffer().EmplaceUniform(info));
79 pass->GetTransientsBuffer().EmplaceStorageBuffer(input_0));
81 pass->GetTransientsBuffer().EmplaceStorageBuffer(input_1));
82 CS::BindOutput(cmd, output_buffer->AsBufferView());
84 ASSERT_TRUE(pass->AddCommand(std::move(cmd)));
85 ASSERT_TRUE(pass->EncodeCommands());
87 fml::AutoResetWaitableEvent latch;
89 cmd_buffer->SubmitCommands([&latch, output_buffer, &input_0,
91 EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
93 auto view = output_buffer->AsBufferView();
94 EXPECT_EQ(view.range.length, sizeof(CS::Output<kCount>));
96 CS::Output<kCount>* output =
97 reinterpret_cast<CS::Output<kCount>*>(view.contents);
99 for (size_t i = 0; i < kCount; i++) {
100 Vector4 vector = output->elements[i];
101 Vector4 computed = input_0.elements[i] * input_1.elements[i];
102 EXPECT_EQ(vector, Vector4(computed.x + 2 + input_1.some_struct.i,
103 computed.y + 3 + input_1.some_struct.vf.x,
104 computed.z + 5 + input_1.some_struct.vf.y,
114 using CS = PrefixSumTestComputeShader;
115 auto context = GetContext();
116 ASSERT_TRUE(context);
117 ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
121 SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
122 ASSERT_TRUE(pipeline_desc.has_value());
123 auto compute_pipeline =
124 context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
125 ASSERT_TRUE(compute_pipeline);
127 auto cmd_buffer = context->CreateCommandBuffer();
128 auto pass = cmd_buffer->CreateComputePass();
129 ASSERT_TRUE(pass && pass->IsValid());
131 static constexpr
size_t kCount = 5;
133 pass->SetGridSize(
ISize(kCount, 1));
134 pass->SetThreadGroupSize(
ISize(kCount, 1));
139 CS::InputData<kCount> input_data;
140 input_data.count = kCount;
141 for (
size_t i = 0; i < kCount; i++) {
142 input_data.data[i] = 1 + i;
145 auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
146 context,
"Output Buffer");
149 cmd, pass->GetTransientsBuffer().EmplaceStorageBuffer(input_data));
150 CS::BindOutputData(cmd, output_buffer->AsBufferView());
152 ASSERT_TRUE(pass->AddCommand(std::move(cmd)));
153 ASSERT_TRUE(pass->EncodeCommands());
155 fml::AutoResetWaitableEvent latch;
156 ASSERT_TRUE(cmd_buffer->SubmitCommands(
158 EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
160 auto view = output_buffer->AsBufferView();
161 EXPECT_EQ(view.range.length, sizeof(CS::OutputData<kCount>));
163 CS::OutputData<kCount>* output =
164 reinterpret_cast<CS::OutputData<kCount>*>(view.contents);
167 constexpr uint32_t expected[kCount] = {1, 3, 6, 10, 15};
168 for (
size_t i = 0; i < kCount; i++) {
169 auto computed_sum = output->data[i];
170 EXPECT_EQ(computed_sum, expected[i]);
179 using CS = ThreadgroupSizingTestComputeShader;
180 auto context = GetContext();
181 ASSERT_TRUE(context);
182 ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
186 SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
187 ASSERT_TRUE(pipeline_desc.has_value());
188 auto compute_pipeline =
189 context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
190 ASSERT_TRUE(compute_pipeline);
192 auto cmd_buffer = context->CreateCommandBuffer();
193 auto pass = cmd_buffer->CreateComputePass();
194 ASSERT_TRUE(pass && pass->IsValid());
196 static constexpr
size_t kCount = 2048;
198 pass->SetGridSize(
ISize(kCount, 1));
199 pass->SetThreadGroupSize(
ISize(kCount, 1));
204 auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
205 context,
"Output Buffer");
207 CS::BindOutputData(cmd, output_buffer->AsBufferView());
209 ASSERT_TRUE(pass->AddCommand(std::move(cmd)));
210 ASSERT_TRUE(pass->EncodeCommands());
212 fml::AutoResetWaitableEvent latch;
213 ASSERT_TRUE(cmd_buffer->SubmitCommands(
215 EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
217 auto view = output_buffer->AsBufferView();
218 EXPECT_EQ(view.range.length, sizeof(CS::OutputData<kCount>));
220 CS::OutputData<kCount>* output =
221 reinterpret_cast<CS::OutputData<kCount>*>(view.contents);
223 EXPECT_EQ(output->data[kCount - 1], kCount - 1);
231 using CS = PrefixSumTestComputeShader;
233 auto context = GetContext();
234 ASSERT_TRUE(context);
235 ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
237 auto callback = [&](
RenderPass& render_pass) ->
bool {
240 SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
241 auto compute_pipeline =
242 context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
244 auto cmd_buffer = context->CreateCommandBuffer();
245 auto pass = cmd_buffer->CreateComputePass();
247 static constexpr
size_t kCount = 1023;
249 pass->SetGridSize(
ISize(kCount, 1));
254 CS::InputData<kCount> input_data;
255 input_data.count = kCount;
256 for (
size_t i = 0; i < kCount; i++) {
257 input_data.data[i] = 1 + i;
260 auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
261 context,
"Output Buffer");
264 cmd, pass->GetTransientsBuffer().EmplaceStorageBuffer(input_data));
265 CS::BindOutputData(cmd, output_buffer->AsBufferView());
267 pass->AddCommand(std::move(cmd));
268 pass->EncodeCommands();
269 return cmd_buffer->SubmitCommands();
271 ASSERT_TRUE(OpenPlaygroundHere(callback));
275 using CS1 = Stage1ComputeShader;
277 using CS2 = Stage2ComputeShader;
280 auto context = GetContext();
281 ASSERT_TRUE(context);
282 ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
284 auto pipeline_desc_1 =
285 Stage1PipelineBuilder::MakeDefaultPipelineDescriptor(*context);
286 ASSERT_TRUE(pipeline_desc_1.has_value());
287 auto compute_pipeline_1 =
288 context->GetPipelineLibrary()->GetPipeline(pipeline_desc_1).Get();
289 ASSERT_TRUE(compute_pipeline_1);
291 auto pipeline_desc_2 =
292 Stage2PipelineBuilder::MakeDefaultPipelineDescriptor(*context);
293 ASSERT_TRUE(pipeline_desc_2.has_value());
294 auto compute_pipeline_2 =
295 context->GetPipelineLibrary()->GetPipeline(pipeline_desc_2).Get();
296 ASSERT_TRUE(compute_pipeline_2);
298 auto cmd_buffer = context->CreateCommandBuffer();
299 auto pass = cmd_buffer->CreateComputePass();
300 ASSERT_TRUE(pass && pass->IsValid());
302 static constexpr
size_t kCount1 = 5;
303 static constexpr
size_t kCount2 =
kCount1 * 2;
305 pass->SetGridSize(
ISize(512, 1));
306 pass->SetThreadGroupSize(
ISize(512, 1));
308 CS1::Input<kCount1> input_1;
310 for (
size_t i = 0; i <
kCount1; i++) {
311 input_1.elements[i] = i;
314 CS2::Input<kCount2> input_2;
315 input_2.count = kCount2;
316 for (
size_t i = 0; i < kCount2; i++) {
317 input_2.elements[i] = i;
320 auto output_buffer_1 = CreateHostVisibleDeviceBuffer<CS1::Output<kCount2>>(
321 context,
"Output Buffer Stage 1");
322 auto output_buffer_2 = CreateHostVisibleDeviceBuffer<CS2::Output<kCount2>>(
323 context,
"Output Buffer Stage 2");
330 pass->GetTransientsBuffer().EmplaceStorageBuffer(input_1));
331 CS1::BindOutput(cmd, output_buffer_1->AsBufferView());
333 ASSERT_TRUE(pass->AddCommand(std::move(cmd)));
340 CS1::BindInput(cmd, output_buffer_1->AsBufferView());
341 CS2::BindOutput(cmd, output_buffer_2->AsBufferView());
342 ASSERT_TRUE(pass->AddCommand(std::move(cmd)));
345 ASSERT_TRUE(pass->EncodeCommands());
347 fml::AutoResetWaitableEvent latch;
348 ASSERT_TRUE(cmd_buffer->SubmitCommands([&latch, &output_buffer_1,
351 EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
353 CS1::Output<kCount2>* output_1 = reinterpret_cast<CS1::Output<kCount2>*>(
354 output_buffer_1->AsBufferView().contents);
355 EXPECT_TRUE(output_1);
356 EXPECT_EQ(output_1->count, 10u);
357 EXPECT_THAT(output_1->elements,
358 ::testing::ElementsAre(0, 0, 2, 3, 4, 6, 6, 9, 8, 12));
360 CS2::Output<kCount2>* output_2 = reinterpret_cast<CS2::Output<kCount2>*>(
361 output_buffer_2->AsBufferView().contents);
362 EXPECT_TRUE(output_2);
363 EXPECT_EQ(output_2->count, 10u);
364 EXPECT_THAT(output_2->elements,
365 ::testing::ElementsAre(0, 0, 4, 6, 8, 12, 12, 18, 16, 24));
374 using CS = SampleComputeShader;
375 auto context = GetContext();
376 ASSERT_TRUE(context);
377 ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
381 SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
382 ASSERT_TRUE(pipeline_desc.has_value());
383 auto compute_pipeline =
384 context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
385 ASSERT_TRUE(compute_pipeline);
387 auto cmd_buffer = context->CreateCommandBuffer();
388 auto pass = cmd_buffer->CreateComputePass();
389 ASSERT_TRUE(pass && pass->IsValid());
391 static constexpr
size_t kCount = 5;
393 pass->SetGridSize(
ISize(kCount, 1));
398 CS::Info info{.count = kCount};
399 CS::Input0<kCount> input_0;
400 CS::Input1<kCount> input_1;
401 for (
size_t i = 0; i < kCount; i++) {
402 input_0.elements[i] =
Vector4(2.0 + i, 3.0 + i, 4.0 + i, 5.0 * i);
403 input_1.elements[i] =
Vector4(6.0, 7.0, 8.0, 9.0);
406 input_0.fixed_array[1] =
IPoint32(2, 2);
408 input_0.some_int = 5;
409 input_1.some_struct = CS::SomeStruct{.vf =
Point(3, 4), .i = 42};
411 auto output_buffer = CreateHostVisibleDeviceBuffer<CS::Output<kCount>>(
412 context,
"Output Buffer");
414 CS::BindInfo(cmd, pass->GetTransientsBuffer().EmplaceUniform(info));
416 pass->GetTransientsBuffer().EmplaceStorageBuffer(input_0));
418 pass->GetTransientsBuffer().EmplaceStorageBuffer(input_1));
419 CS::BindOutput(cmd, output_buffer->AsBufferView());
421 ASSERT_TRUE(pass->AddCommand(std::move(cmd)));
422 ASSERT_TRUE(pass->EncodeCommands());
424 fml::AutoResetWaitableEvent latch;
426 cmd_buffer->SubmitCommands([&latch, output_buffer, &input_0,
428 EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
430 auto view = output_buffer->AsBufferView();
431 EXPECT_EQ(view.range.length, sizeof(CS::Output<kCount>));
433 CS::Output<kCount>* output =
434 reinterpret_cast<CS::Output<kCount>*>(view.contents);
436 for (size_t i = 0; i < kCount; i++) {
437 Vector4 vector = output->elements[i];
438 Vector4 computed = input_0.elements[i] * input_1.elements[i];
439 EXPECT_EQ(vector, Vector4(computed.x + 2 + input_1.some_struct.i,
440 computed.y + 3 + input_1.some_struct.vf.x,
441 computed.z + 5 + input_1.some_struct.vf.y,
451 using CS = SampleComputeShader;
452 auto context = GetContext();
453 ASSERT_TRUE(context);
454 ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
458 SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
459 ASSERT_TRUE(pipeline_desc.has_value());
460 auto compute_pipeline =
461 context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
462 ASSERT_TRUE(compute_pipeline);
464 auto cmd_buffer = context->CreateCommandBuffer();
465 auto pass = cmd_buffer->CreateComputePass();
466 ASSERT_TRUE(pass && pass->IsValid());
468 static constexpr
size_t kCount = 5;
472 pass->SetGridSize(
ISize(0, 1));
473 pass->SetThreadGroupSize(
ISize(0, 1));
478 CS::Info info{.count = kCount};
479 CS::Input0<kCount> input_0;
480 CS::Input1<kCount> input_1;
481 for (
size_t i = 0; i < kCount; i++) {
482 input_0.elements[i] =
Vector4(2.0 + i, 3.0 + i, 4.0 + i, 5.0 * i);
483 input_1.elements[i] =
Vector4(6.0, 7.0, 8.0, 9.0);
486 input_0.fixed_array[1] =
IPoint32(2, 2);
488 input_0.some_int = 5;
489 input_1.some_struct = CS::SomeStruct{.vf =
Point(3, 4), .i = 42};
491 auto output_buffer = CreateHostVisibleDeviceBuffer<CS::Output<kCount>>(
492 context,
"Output Buffer");
494 CS::BindInfo(cmd, pass->GetTransientsBuffer().EmplaceUniform(info));
496 pass->GetTransientsBuffer().EmplaceStorageBuffer(input_0));
498 pass->GetTransientsBuffer().EmplaceStorageBuffer(input_1));
499 CS::BindOutput(cmd, output_buffer->AsBufferView());
501 ASSERT_TRUE(pass->AddCommand(std::move(cmd)));
502 ASSERT_FALSE(pass->EncodeCommands());