From: Luke Lau Date: Mon, 20 Apr 2020 20:18:08 +0000 (+0100) Subject: Report X-Git-Url: https://git.lukelau.me/?p=clouds.git;a=commitdiff_plain;h=c8b0a84bbafd306103b005e7e238b11a656dcba3 Report --- diff --git a/clouds.cpp b/clouds.cpp index 5787181..b8ea3f0 100644 --- a/clouds.cpp +++ b/clouds.cpp @@ -169,8 +169,6 @@ const int shadeWidth = 256, shadeHeight = 256; #ifdef PBO const int numPbos = 512; GLuint pboBufs[numPbos]; -GLuint pboOffsets[numPbos]; // offsets into GL_PIXEL_PACK_BUFFER that - // mapPixelRead should read from GLbyte sink[shadeWidth * shadeHeight * 4]; void inline mapPixelRead(int pboBuf, int metaball) { @@ -178,8 +176,6 @@ void inline mapPixelRead(int pboBuf, int metaball) { GLubyte *src = (GLubyte *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, 4 * sizeof(GLubyte), GL_MAP_READ_BIT); - /* (GLubyte *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, pboOffsets[pboBuf], */ - /* 4 * sizeof(GLubyte), GL_MAP_READ_BIT); */ vec4 pixel = vec4(src[0], src[1], src[2], src[3]) / vec4(255.f); glUnmapBuffer(GL_PIXEL_PACK_BUFFER); @@ -274,10 +270,7 @@ void shadeClouds() { /* glReadPixels(screenPos.x, screenPos.y, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, */ /* NULL); */ - /* glReadPixels(0, 0, shadeWidth, shadeHeight, GL_RGBA, GL_UNSIGNED_BYTE, */ - /* NULL); */ glReadPixels(screenPos.x, screenPos.y, 64, 64, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - /* pboOffsets[pboIdx] = screenPos.x * 4 + screenPos.y * shadeWidth * 4; */ int nextPbo = (pboIdx + 1) % numPbos; if (i >= numPbos - 1) { diff --git a/report/.chktexrc b/report/.chktexrc new file mode 100644 index 0000000..3fbadf8 --- /dev/null +++ b/report/.chktexrc @@ -0,0 +1 @@ +VerbEnv { algorithmic } \ No newline at end of file diff --git a/report/.gitignore b/report/.gitignore new file mode 100644 index 0000000..3862985 --- /dev/null +++ b/report/.gitignore @@ -0,0 +1,8 @@ +*.pdf +*~ +*.aux +*.bbl +_region_* +*.log +*.blg +*.prv \ No newline at end of file diff --git a/report/attenuationTextures.png b/report/attenuationTextures.png new file mode 100644 index 0000000..16a779e Binary files /dev/null and b/report/attenuationTextures.png differ diff --git a/report/render0.png b/report/render0.png new file mode 100644 index 0000000..a8cd648 Binary files /dev/null and b/report/render0.png differ diff --git a/report/render1.png b/report/render1.png new file mode 100644 index 0000000..66a2c00 Binary files /dev/null and b/report/render1.png differ diff --git a/report/render2.png b/report/render2.png new file mode 100644 index 0000000..ca96541 Binary files /dev/null and b/report/render2.png differ diff --git a/report/report.bib b/report/report.bib new file mode 100644 index 0000000..c3b4e0d --- /dev/null +++ b/report/report.bib @@ -0,0 +1,29 @@ +@inproceedings{dobashi2000simple, + title={A simple, efficient method for realistic animation of clouds}, + author={Dobashi, Yoshinori and Kaneda, Kazufumi and Yamashita, Hideo and Okita, Tsuyoshi and Nishita, Tomoyuki}, + booktitle={Proceedings of the 27th annual conference on Computer graphics and interactive techniques}, + pages={19--28}, + year={2000} +} + +@inproceedings{dobashi1996display, author = {Nishita, Tomoyuki and Dobashi, Yoshinori and Nakamae, Eihachiro}, title = {Display of Clouds Taking into Account Multiple Anisotropic Scattering and Sky Light}, year = 1996, isbn = 0897917464, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/237170.237277}, doi = {10.1145/237170.237277}, booktitle = {Proceedings of the 23rd Annual Conference on Computer Graphics and Interactive Techniques}, pages = {379–386}, numpages = 8, keywords = {sky light, clouds, optical length, photo-realism, multiple scattering, participating media, radiative transfer}, series = {SIGGRAPH ’96} } + +@inproceedings{dobashi1998animation, + title={Animation of clouds using cellular automaton}, + author={Dobashi, Yoshinori and Nishita, Tomoyuki and Okita, Tsuyoshi}, + booktitle={Proceedings of Computer Graphics and Imaging}, + volume={98}, + pages={251--256}, + year={1998} +} + +@article{nagel1992self, + title={Self-organizing criticality in cloud formation?}, + author={Nagel, Kai and Raschke, Ehrhard}, + journal={Physica A: Statistical Mechanics and its Applications}, + volume={182}, + number={4}, + pages={519--531}, + year={1992}, + publisher={Elsevier} +} \ No newline at end of file diff --git a/report/report.tex b/report/report.tex new file mode 100644 index 0000000..bc3cb6e --- /dev/null +++ b/report/report.tex @@ -0,0 +1,376 @@ +\documentclass{article} + +\usepackage{listings} +\usepackage{algorithm} +\usepackage{algpseudocode} +\usepackage{graphicx} +\usepackage{amsmath} + +\title{An Implementation of a Simple, Efficient Method for Realistic + Animation of Clouds} +\author{Luke Lau} +\begin{document} +\maketitle + +\section{Abstract} +This report details the implementation of a method for rendering and +animating clouds by Dobashi et.\ al~\cite{dobashi2000simple} in +OpenGL. It currently implements the cellular automata, shading and +rendering parts as described in the paper, as well as additional +features for debugging and viewing the underlying process. + +\section{Background} +This paper is just one of many from Dobashi and Yoshinori's work into +the rendering of cloud and other atmospheric +effects~\cite{dobashi1996display,dobashi1998animation}. The process of +generating such clouds begins with cellular automata to simulate cloud +movement and growth. Whilst not physically accurate, it is remarkably +effective for how simple it is. + +\subsection{Cellular automata} +The method for cellular automata is an extension of earlier +work~\cite{nagel1992self}, extended with two extra rules. The clouds +are simulated on a 3D grid, with three binary variables for each cell: +Humidity, transition (written as +\textit{act} in the paper for activation) and clouds. The clouds variable +determines what actually gets rendered in the end. The four rules that +operate on these variables are as follows: +\begin{enumerate} +\item[\bf Growth] Causes clouds to appear over time. +\item[\bf Extinction] Causes clouds to disappear over time. +\item[\bf Advection] Causes clouds to move over time (due to wind). +\end{enumerate} +In addition to the three variables, there are also ellipsoids of +probabilities generated for extinction, humidity and transition. These +are generated randomly at the beginning and are used for the +extinction and advection rules. They are static in that they do not +change over time, and simulate air parcels. + +After each time step in the cellular automata, the grid of binary +cloud values is converted to a grid of continuous densities, which +represent a field of metaballs of varying density. + +\subsection{Shading} +Before each metaball is rendered to give the final cloud image, the +amount (and colour) of light reaching it needs to be +calculated. Because other cloud ``particles'' can obscure and scatter +the light, this needs to be calculated individually. The general +approach taken by the paper is to start with a blank white buffer, +projected \textit{orthogonally} from the suns point of view, and +starting from the closest metaball, multiply the attenuation ratios +onto the buffer. This slowly blots out the frame, blocking out the +light as more and more metaballs are added. Each time a metaball's +attenuation ratio is multiplied onto the buffer, the centre pixel of +that metaball is read, multiplied by the colour of the sun, and then +stored to be used later in the rendering step. + +The attenuation ratios are stored as textures like the ones shown in +Figure~\ref{fig:textures}. These are different for each density, but +because the attenuation ratio isn't proportional to the density a +discrete number of textures (64 for this paper) are generated +instead. Then the attenuation ratio texture closest to each metaball's +density can be selected. +These textures need to be precalculated beforehand, just once. + +The metaballs are rendered as billboards: flat surfaces that are +always orientated towards the camera. To multiply the attenuation +ratios, the OpenGL \texttt{GL\_MODULATE} texture blend parameter is +used. + +\subsection{Rendering} +Once the colour of light reaching each metaball has been calculated, +the metaballs are then rendered from the camera's perspective, onto +the buffer that the user will actually see. The buffer is cleared with +the colour of the sky, and then beginning with the metaball furthest +from the camera, each metaball is again rendered as a billboard using +the attenuation ratio texture. This time, the textures are blended +instead of modulated. This is not to be confused with the +fragment blending that occurs with \texttt{GL\_BLEND}. + +\section{Implementation} +\subsection{Ceullar automata} +The implementation of the cellular automata simulation +was the most straightforward part of the process. I did not attempt to +implement it with bitwise operations --- the cost of simulation with +8-bit booleans was negligible given the other costs described later +on. The paper states that the humidity and activation fields are set +``randomly'', but then delegates the details to~\cite{nagel1992self}, +which I was unable to access. Therefore I made some artistic liberties +and chose a non-uniform probability of 0.01 and 0.005 +respectively. + +\subsection{Metaballs} +Once the binary cells are converted to a grid of continuous densities, +these needed to specify a set of metaballs. There was a bit of +ambiguity here as the paper just states ``the continuous density +distribution is expressed by a set of metaballs'', so from the +implementation in~\cite{dobashi1998animation} I interpreted this as a +fixed grid of metaballs at each point, assigning a density to each one. + +One of the biggest challenges I faced when interpreting the paper, was +figuring out how the billboard textures are precalculated. The paper +provides a vague diagram stating that both the attenuation ratio and +cumulative density are stored, but doesn't explain how to calculate +the former. I can only assume that it must be possible to calculate +this in a physically accurate way. Again, artistic liberties were +taken here in Listing~\ref{lst:textures} to guess a method for +generating the result in Figure~\ref{fig:textures}. + +\begin{figure} + \centering + \includegraphics[width=2in]{attenuationTextures}~ + \includegraphics[width=2in]{shadowMap} + \caption{Attenuation ratio textures for varying densities, and the + shadow map produced as a byproduct of shading.}\label{fig:textures} +\end{figure} + + +\begin{lstlisting}[language=C,basicstyle=\footnotesize,float,caption=The + method for used for generating attenuation ratio textures,label={lst:textures}] +float data[32 * 32]; +for (int j = 0; j < 32; j++) { + for (int i = 0; i < 32; i++) { + // TODO: properly calculate this + // instead of whatever this is + float r = distance(vec2(i, j), vec2(16, 16)) / 16; + float density = (float)d / NQ; + float x = (3 * density * (metaballField(r) / normFactor)); + data[i + j * 32] = 1 - fmin(1, x); + } +} +\end{lstlisting} + +\subsection{Shading} +The process of displaying a frame looks is given in the pseudocode of +Algorithm~\ref{alg:display}. The first main procedure that occurs is +shading the clouds, which calculates the colour of the light reaching +each metaball. + +\subsubsection{Billboards} +As mentioned earlier, the metaballs are rendered as billboards that +always face the camera. This can be done quite easily, by just copying +the orientation part of the view matrix onto the orientation part of +the model matrix. That way, the metaball's orientation matches +exactly that of the camera's. +\begin{align*} + \mathit{view} &= \begin{bmatrix} + v_{00} & v_{01} & v_{02} & v_{03} \\ + v_{10} & v_{11} & v_{12} & v_{13} \\ + v_{20} & v_{21} & v_{22} & v_{23} \\ + v_{30} & v_{31} & v_{32} & v_{33} + \end{bmatrix} && + \mathit{model} &= \begin{bmatrix} + v_{00} & v_{01} & v_{02} & dx \\ + v_{10} & v_{11} & v_{12} & dx \\ + v_{20} & v_{21} & v_{22} & dx \\ + 0 & 0 & 0 & 1 \\ + \end{bmatrix} +\end{align*} + +\subsubsection{Blending} +\texttt{glBlendFunc} controls how the +fragment shader output and the existing buffer fragment are blended +together before writing to the buffer. Whilst shading, \texttt{GL\_ZERO} +is set for the fragment shader and \texttt{GL\_SRC\_ALPHA} is set for +the buffer. This means that the final fragment written is calculated +as: +\[ + \mathit{bufferFrag} \gets \mathit{shaderFrag} * 0 + \mathit{bufferFrag} * \mathit{shaderFrag.alpha} +\] +The effect of this is that the original buffer of RGBA(1,1,1,1) is +gradually blotted out, as the original white colour is multiplied away +by the attenuation ratios' alpha. + +\subsubsection{Texture modes} +The paper specifies that the texture mode should be set to +\texttt{GL\_MODULATE} with \texttt{glTexEnv}. After trying this out, +my program kept on crashing. As it turns out, this is part of the old +fixed function pipeline. My implementation was shader based, so I +could no longer use this texture mode. Thankfully, the formulae used +for the texture modes are listed in the Man pages, and the logic could +be reimplemented in the shader, shown in Listing~\ref{lst:shader}. + +\begin{lstlisting}[language=C,basicstyle=\footnotesize,float,caption={Logic + for old fixed-function pipeline texture modes, reimplemented in the + shader},label={lst:shader}] +// Cf = color from fragment, Ct = color from texture +// Cc = color from texture environment +// not set, defaults to (0,0,0,0) +// Af = alpha from fragment, At = alpha from texture +// C = output color, A = output alpha +float f = texture(tex, texCoord).r; +if (modulate) { + // GL_MODULATE: C + // C = Cf * Ct + // A = Af * At + // the +0.06 is a hack to get lighter clouds! + // can be thought of as ambient light + FragColor = color * (f + 0.02); +} else { + // GL_BLEND: + // C = Cf * (1-Ct) + Cc * Ct + // A = Af * At + vec3 C = color.rgb * (1 - f); + float A = color.a * f; + FragColor = vec4(C, A); +} +\end{lstlisting} + +\begin{algorithm} + \caption{Rendering and displaying a frame}\label{alg:display} + \begin{algorithmic} + \Procedure{shadeClouds}{} + \State Sort metaballs in ascending distance from the sun + \State \Call{glUniform}{modulate, true} + \State \Call{glBlendFunc}{GL\_ZERO, GL\_SRC\_ALPHA} + \For{$k\gets \mathit{metaballs}$} + \State Rotate metaball to face the sun + \State \Call{glBindTexture}{textures[$\mathit{k.density}$]} + \State \Call{glDrawElements}{} + \State $c \gets \textrm{pixel at center of metaball}$ + \State $\mathit{k.col} \gets c * \textrm{colour of sun}$ + \EndFor + \State Bonus: Store the current framebuffer as a free shadow map + \EndProcedure + + \Procedure{renderClouds}{} + \State Sort metaballs in descending distance from the camera + \State \Call{glUniform}{modulate, false} + \State \Call{glBlendFunc}{GL\_ONE, GL\_SRC\_ALPHA} + \For{$k \gets \mathit{metaballs}$} + \State Rotate metaball to face the sun + \State \Call{glBindTexture}{textures[$k$.density]} + \State \Call{glUniform}{$\mathit{color}, \mathit{k.col}$} + \State \Call{glDrawElements}{} + \EndFor + \EndProcedure + + \Procedure{display}{} + \State $\mathit{projection} \gets $ \Call{orthographic}{} + \State $\mathit{view} \gets$ \Call{lookAt}{$\mathit{sunPos}, \mathit{sunPos} + \mathit{sunDir}$} + \State Clear buffer with RGBA(1,1,1,1) + \State \Call{shadeClouds}{} + \State $\mathit{projection} \gets $ \Call{perspective}{} + \State $\mathit{view} \gets$ \Call{lookAt}{$\mathit{camPos}, \mathit{camPos} + \mathit{camDir}$} + \State Clear buffer with sky colour + \State \Call{renderClouds}{} + \EndProcedure + \end{algorithmic} +\end{algorithm} + +\subsection{Rendering} +\subsubsection{Buffers} +The rendering process is similar to shading, in that we draw each +metaball onto a buffer. Originally the shading process happened in an +off-screen framebuffer so that the default framebuffer was +undisturbed, but since the rendering clears the buffer anyway and +draws on top of it, there was no need and it was eventually removed so +that everything happens in the one buffer. However caution was needed +to make sure that the orthographic projection in the shading process +was large enough, so that all the metaballs would fit in the same +dimensions as the frame it was being drawn to. + +\subsubsection{Blending} +The blending for rendering uses the equation: +\[ + \mathit{bufferFrag} \gets \mathit{shaderFrag} * 1 + \mathit{bufferFrag} * \mathit{shaderFrag.alpha} +\] +Unlike the shading process, the fragment from the shader actually gets +added into the buffer this time round. And likewise, the texture is no +longer modulated, so the uniform variable passed to the shader is updated. + +\subsection{Debug} +It would have been near impossible to implement this correctly if I +weren't able to visualize all the different steps in this +process. Different debug modes were added, which can be accessed by +typing the numbers 0--4 on the keyboard. Currently they are implemented +all together in the main shader, toggled by boolean uniforms passed +into the shader. This is pretty inefficient, as when not debugging the +shaders still need to pay the price for all the branching and checking. +They should moved out into a separate shader and switched to whenever +a debug mode is entered. +\begin{table}[h] + \centering + \begin{tabular}{c|c} + \hline + \textbf{Key} & \textbf{Mode} \\ + \hline + 0 & Shaded \& rendered \\ + 1 & Cloud density \\ + 2 & Metaball shading \\ + 3 & Transition probabilities \\ + 4 & Extinction probabilities + \end{tabular} +\end{table} + +\section{Evaluation} +Despite this course being \textit{Real-time Rendering}, the rendering +process here was very much \textbf{not} real time. The authors of the +paper say that it took around 10 seconds to render a single frame for +a grid of $256\times128\times20$. On my machine in 2020, this took around 12 +seconds. Something was definitely wrong, so I profiled the program and +found that the vast majority of time was being spent inside +\texttt{shadeClouds}: namely waiting on the \texttt{glReadPixels} +call. \texttt{glReadPixels} forces synchronization of the entire GPU +pipeline and blocks until the GPU is finished drawing and has +transferred the requested buffer data back. This was happening for +every single metaball, killing performance entirely. For perspective, +about 1 second was spent doing the simulation and rendering. + +It isn't clear what exactly the authors used to read the pixel at each +metaball. But what was clear was that the pixel data wasn't needed +immediately. It's only needed for the rendering process, not the +shading. So I turned to pixel buffer objects (PBO) --- a buffer object +in OpenGL that can be used for \emph{asynchronous} pixel +transfers. The general workflow for packing (downloading) the data +from the GPU asynchronously from within a loop looked like this: +\begin{lstlisting}[language=C,basicstyle=\footnotesize,breaklines=true] +glBindBuffer(GL_PIXEL_PACK_BUFFER, pboBufs[pboIdx]); +glReadPixels(screenPos.x, screenPos.y, 64, 64, GL_RGBA, +GL_UNSIGNED_BYTE, NULL); +glBindBuffer(GL_PIXEL_PACK_BUFFER, pboBufs[pboBuf + 1]); +GLubyte *pixel = (GLubyte *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, 4 * sizeof(GLubyte), GL_MAP_READ_BIT); +glUnmapBuffer(GL_PIXEL_PACK_BUFFER); +glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); +\end{lstlisting} +\texttt{glReadPixels} now +queued an asynchronous read to the PBO that was bound and returned +immediately. Meanwhile, we would map the buffer of \textit{another} +PBO that already had \texttt{glReadPixels} called on it, and so had +data ready for us. + +Because the time it takes to (issue the commands to) draw a metaball +is so short, a large buffer of 512 PBOs were set up to minimize the +time spent waiting on downloads to finish. Reading the pixels is +still the bottleneck, but it now only takes 2.8 seconds instead of 16. + +\section{Improvements} +Despite the nice speed up by using PBOs, the constant thrashing back +and forth the GPU is doing by reading and writing is undesirable. In +the future I would like to explore methods of storing the pixel values +somewhere on the GPU, and once all the metaballs are shaded, then +doing one single download to the CPU. I have discussed this with +people on the \#OpenGL channel on the Freenode IRC network, and +apparently it is possible to achieve something within a shader. + +Using bitfields and bitwise operations for the simulation would also +undoubtedly improve performance, and the last contribution of the +paper, shafts of lights, remain to be implemented. + +Lastly, a lot of implementation details still remain unclear to +me. Perhaps it was meant that two sets of textures were to be stored: +one for the attenuation ratios, and one for the cumulative densities? +Then the attenuation ratios would have been used for the shading part +whilst the cumulative densities would have been used the rendering +part. But again, we would need to figure out how to calculate both in +the first place. + +\vspace{.2in} +\includegraphics[width=0.3\textwidth]{render0}~ +\includegraphics[width=0.3\textwidth]{render1}~ +\includegraphics[width=0.3\textwidth]{render2} + +\bibliographystyle{acm} +\bibliography{report} + +\end{document} \ No newline at end of file diff --git a/report/shadowMap.png b/report/shadowMap.png new file mode 100644 index 0000000..b4b6cc7 Binary files /dev/null and b/report/shadowMap.png differ