Merge pull request codeplaysoftware#1 from codeplaysoftware/feb_updates

Making some minor changes and corrections to the presentations Lesson 1 slide 2 - bullets mis-aligned, removed apostrophe slide 19 - highlighted the close bracket }); slide 22 - scope is wrong here, corrected slide 23 - added new line after first point Lesson 2 slide 16 - highlight parallel_for slide 23 - missing language restrictions, corrected and added note for to blog posts on some of these topics slide 26 - made it more succinct Lesson 3 slide 6 - added new bullet point - This can often resemble incorrect results slide 17 - added further information Lesson 4 slide 2 - updated the text slide 9 - made more succinct slide 30 - updated and added new slide
blasvicco · Mar 6, 2020 · a897529 · a897529
2 parents 63c07d0 + 40e6a42
commit a897529
Show file tree

Hide file tree

Showing 4 changed files with 73 additions and 29 deletions.
diff --git a/Lesson_Materials/Lesson-1-Introduction-to-SYCL/index.html b/Lesson_Materials/Lesson-1-Introduction-to-SYCL/index.html
@@ -22,13 +22,12 @@
 				</section>
 				<!--Slide 2-->
 				<section class="hbox" data-markdown>
-				    ## Learning Objectives
-
-		            * Learn about the SYCL 1.2.1 specification and it's implementations 
+					## Learning Objectives
+					* Learn about the SYCL 1.2.1 specification and its implementations 
 					* Learn about the major features that SYCL provides 
 					* Learn about the components of a SYCL implementation 
 					* Learn about the anatomy of a typical SYCL application 
-					* Learn where to find useful resources for SYCL
+					* Learn where to find useful resources for SYCL 
                 </section>
 				<!--Slide 3-->
 				<section class="hbox" data-markdown>
@@ -218,8 +217,8 @@
 					<div class="hbox">
 						<code><pre>
 
-<mark>#include &ltCL/sycl.hpp&gt </mark>
-using namespace cl::sycl;
+<mark>#include &ltCL/sycl.hpp&gt 
+using namespace cl::sycl;</mark>
 class add;
 
 int main(int argc, char *argv[]) { 
@@ -299,7 +298,7 @@
 
 
 
-  });
+  <mark>});</mark>
 }
 						</code></pre>
 					</div>
@@ -398,18 +397,19 @@
   std::vector<float> dA{ … }, dB{ … }, dO{ … };
 
   queue gpuQueue{gpu_selector{}};
+<mark>  {</mark>
+    buffer<float, 1> bufA(dA.data(), range<1>(dA.size())); 
+    buffer<float, 1> bufB(dB.data(), range<1>(dB.size())); 
+    buffer<float, 1> bufO(dO.data(), range<1>(dO.size()));
 
-  buffer<float, 1> bufA(dA.data(), range<1>(dA.size())); 
-  buffer<float, 1> bufB(dB.data(), range<1>(dB.size())); 
-  buffer<float, 1> bufO(dO.data(), range<1>(dO.size()));
-
-  gpuQueue.submit([&](handler &cgh)<mark>{</mark>
+    gpuQueue.submit([&](handler &cgh){
 
 
 
 
 
-  <mark>});</mark>
+    });
+  <mark>}</mark>
 }
 						</code></pre>
 					</div>
@@ -449,8 +449,8 @@
 						</code></pre>
 					</div>
 					<div class="bottom-bullets" data-markdown>
-						We create an accessor for each of the buffers
-						Read access for the two input buffers and write access for the output buffer
+						 * We create an accessor for each of the buffers
+						 * Read access for the two input buffers and write access for the output buffer
 					</div>
 				</section>
 				<!--Slide 24-->

diff --git a/Lesson_Materials/Lesson-2-Launching-SYCL-Kernels/index.html b/Lesson_Materials/Lesson-2-Launching-SYCL-Kernels/index.html
@@ -217,7 +217,7 @@
 
   gpuQueue.submit([&](handler &cgh){
 
-    cgh.parallel_for&ltadd&gt(range&lt1&gt(1024), 
+    cgh.<mark>parallel_for</mark>&ltadd&gt(range&lt1&gt(1024), 
       [=](id&lt1&gt i) { 
       // kernel code }); 
   }); 
@@ -430,7 +430,19 @@
 						* Kernel Function Naming
 						  * SYCL kernel functions declared with a lambda must be named using a forward declarable C++ type, declared in global scope 
 						  * SYCL kernel function names follow C++ ODR rules, which means you cannot have two kernels with the same name
+						* Language restrictions for SYCL kernel functions:
+						  * No dynamic allocation
+						  * No dynamic polymorphism
+						  * No recursion
+						  * No function pointers
+						  * No exception handling
+						  * No RTTI
 					</div>
+					<aside class="notes">
+						Note that there are ways to get around some of these restrictions. For example there are blog posts offering alternative ways to achieve function pointers and dynamic polymorphism using standard C++ code.
+						https://www.codeplay.com/portal/09-24-19-alternatives-to-cpp-function-pointers-in-sycl-using-function-objects
+						https://www.codeplay.com/portal/07-12-19-enabling-polymorphism-in-sycl-using-the-cpp-idiom-crtp
+					</aside>
 				</section>
 				<!--Slide 23-->
 				<section>
@@ -554,7 +566,7 @@
 };
 						</code></pre>
 
-							To use a C++ function object you simply construct an instance of the type initialising the accessors and pass it to parallel_for<br><br>
+							To use a C++ function object you simply construct an instance of the type initializing the accessors and pass it to parallel_for<br><br>
 
 							Notice you no longer need to name the SYCL kernel
 						</div>

diff --git a/Lesson_Materials/Lesson-3-Handling-SYCL-Errors/index.html b/Lesson_Materials/Lesson-3-Handling-SYCL-Errors/index.html
@@ -77,6 +77,7 @@
 					</div>
 					<div class="bottom-bullets" data-markdown>
 							* If errors are not handled, the application can fail silently
+							  * This can often resemble incorrect results
 					</div>
 				</section>
 				<!--Slide 7-->
@@ -312,6 +313,7 @@
 						<div class="bottom-bullets" data-markdown>
 							* Any SYCL application can be debugged on the host device by switching the queue for a host queue
 							* By replacing the device selector for the host_selector will ensure that the queue submits all work to the host device
+							* Be aware this could have an impact on data copies as data would no longer be required to be copied to the original device
 						</div>
 				</section>
 			</div>

diff --git a/Lesson_Materials/Lesson-4-Managing-Data-in-SYCL-Applications/index.html b/Lesson_Materials/Lesson-4-Managing-Data-in-SYCL-Applications/index.html
@@ -28,7 +28,6 @@
 					* Learn about different ways to access the data
 					* Learn about execution ordering using data dependencies
 					* Learn about how SYCL synchronizes data
-					* Understand the SPMD model of describing parallelism
 				</section>
 				<!--Slide 3-->
 				<section class="hbox" data-markdown>
@@ -95,10 +94,11 @@
 				<section>
 					<div class="hbox" data-markdown>
 						* SYCL separates the storage and access of data 
-						  * A SYCL buffer manages data across the host and any number of devices 
-						  * A SYCL accessor requests access to data on the host or on a device for a specific SYCL kernel function
+						  * A SYCL buffer manages data across the host devices 
+						  * A SYCL accessor requests access to data for a specific kernel
 						* Accessors are also used to access data within a SYCL kernel function
-						  * This means they are declared in the host code but captured by and then accessed within a SYCL kernel function
+						  * This means they are declared in the host code 
+						  * But captured by and then accessed within a SYCL kernel
 					</div>
 				</section>
 				<!--Slide 10-->
@@ -345,16 +345,16 @@
 					<div class="container">
 						<div class="col-left-3">
 							<code><pre>
-buffer&ltfloat, 1&gt bufA(dA.data(), range&lt1&gt(dA.size())); 
-buffer&ltfloat, 1&gt bufB(dB.data(), range&lt1&gt(dB.size())); 
-buffer&ltfloat, 1&gt bufO(dO.data(), range&lt1&gt(dO.size()));
+buffer&ltfloat, 3&gt bufA(dA, rng); 
+buffer&ltfloat, 3&gt bufB(dB, rng); 
+buffer&ltfloat, 3&gt bufO(dO, rng);
 
 gpuQueue.submit([&](handler &cgh){
   auto inA = bufA.get_access&ltaccess::mode::read&gt(cgh);
   auto inB = bufB.get_access&ltaccess::mode::read&gt(cgh);
   auto out = bufO.get_access&ltaccess::mode::write&gt(cgh);
   cgh.parallel_for&ltadd&gt(range&lt1&gt(dA.size()), 
-    [=](id&lt1&gt i){ 
+    [=](id&3t1&gt i){ 
     <mark>out[i] = inA[i] + inB[i];</mark>
   });
 });
@@ -370,15 +370,45 @@
 					<div class="container">
 						<div class="col-left-3">
 							<code><pre>
-buffer&ltfloat, 1&gt bufA(dA.data(), range&lt1&gt(dA.size())); 
-buffer&ltfloat, 1&gt bufB(dB.data(), range&lt1&gt(dB.size())); 
-buffer&ltfloat, 1&gt bufO(dO.data(), range&lt1&gt(dO.size()));
+buffer&ltfloat, 3&gt bufA(dA, rng); 
+buffer&ltfloat, 3&gt bufB(dB, rng); 
+buffer&ltfloat, 3&gt bufO(dO, rng);
+
+gpuQueue.submit([&](handler &cgh){
+  auto inA = bufA.get_access&ltaccess::mode::read&gt(cgh);
+  auto inB = bufB.get_access&ltaccess::mode::read&gt(cgh);
+  auto out = bufO.get_access&ltaccess::mode::write&gt(cgh);
+  cgh.parallel_for&ltadd&gt(range&lt1&gt(dA.size()), 
+    [=](id&3t1&gt i){ 
+    <mark>auto x = i[0];
+    auto y = i[1];
+    auto z = i[2];
+    out[z][y][x] = inA[z][y][x] + inB[z][y][x];</mark>
+  });
+});
+							</code></pre>
+						</div>
+						<div class="col-right-2" data-markdown>
+							* Here we access the data of the accessor by passing in the index at each dimension in nested subscript operators
+						</div>
+					</div>
+				</section>
+
+
+				<!--Slide 31-->
+				<section>
+					<div class="container">
+						<div class="col-left-3">
+							<code><pre>
+buffer&ltfloat, 3&gt bufA(dA, rng); 
+buffer&ltfloat, 3&gt bufB(dA, rng); 
+buffer&ltfloat, 3&gt bufO(dA, rng);
 
 gpuQueue.submit([&](handler &cgh){
   auto inA = bufA.get_access&ltaccess::mode::read&gt(cgh);
   auto inB = bufB.get_access&ltaccess::mode::read&gt(cgh);
   auto out = bufO.get_access&ltaccess::mode::write&gt(cgh);
-  cgh.parallel_for&ltadd&gt(rng, [=](id&lt3&gt i){
+  cgh.parallel_for&ltadd&gt(rng, [=](id&3t3&gt i){
     <mark>auto ptrA = inA.get_pointer();</mark>
     <mark>auto ptrB = inB.get_pointer();</mark>
     <mark>auto ptrO = out.get_pointer();</mark>