index.html

<style type="text/css">
	body {
		font-family: "HelveticaNeue-Light", "Helvetica Neue Light", "Helvetica Neue", Helvetica, Arial, "Lucida Grande", sans-serif;
		font-weight:300;
		font-size:18px;
		margin-left: auto;
		margin-right: auto;
		width: 1100px;
	}

	h1 {
		font-weight:300;
	}

	.disclaimerbox {
		background-color: #eee;
		border: 1px solid #eeeeee;
		border-radius: 10px ;
		-moz-border-radius: 10px ;
		-webkit-border-radius: 10px ;
		padding: 20px;
	}

	video.header-vid {
		height: 140px;
		border: 1px solid black;
		border-radius: 10px ;
		-moz-border-radius: 10px ;
		-webkit-border-radius: 10px ;
	}

	img.header-img {
		height: 140px;
		border: 1px solid black;
		border-radius: 10px ;
		-moz-border-radius: 10px ;
		-webkit-border-radius: 10px ;
	}

	img.rounded {
		border: 1px solid #eeeeee;
		border-radius: 10px ;
		-moz-border-radius: 10px ;
		-webkit-border-radius: 10px ;
	}

	a:link,a:visited
	{
		color: #1367a7;
		text-decoration: none;
	}
	a:hover {
		color: #208799;
	}

	td.dl-link {
		height: 160px;
		text-align: center;
		font-size: 22px;
	}

	.layered-paper-big { /* modified from: http://css-tricks.com/snippets/css/layered-paper/ */
		box-shadow:
		        0px 0px 1px 1px rgba(0,0,0,0.35), /* The top layer shadow */
		        5px 5px 0 0px #fff, /* The second layer */
		        5px 5px 1px 1px rgba(0,0,0,0.35), /* The second layer shadow */
		        10px 10px 0 0px #fff, /* The third layer */
		        10px 10px 1px 1px rgba(0,0,0,0.35), /* The third layer shadow */
		        15px 15px 0 0px #fff, /* The fourth layer */
		        15px 15px 1px 1px rgba(0,0,0,0.35), /* The fourth layer shadow */
		        20px 20px 0 0px #fff, /* The fifth layer */
		        20px 20px 1px 1px rgba(0,0,0,0.35), /* The fifth layer shadow */
		        25px 25px 0 0px #fff, /* The fifth layer */
		        25px 25px 1px 1px rgba(0,0,0,0.35); /* The fifth layer shadow */
		margin-left: 10px;
		margin-right: 45px;
	}


	.layered-paper { /* modified from: http://css-tricks.com/snippets/css/layered-paper/ */
		box-shadow:
		        0px 0px 1px 1px rgba(0,0,0,0.35), /* The top layer shadow */
		        5px 5px 0 0px #fff, /* The second layer */
		        5px 5px 1px 1px rgba(0,0,0,0.35), /* The second layer shadow */
		        10px 10px 0 0px #fff, /* The third layer */
		        10px 10px 1px 1px rgba(0,0,0,0.35); /* The third layer shadow */
		margin-top: 5px;
		margin-left: 10px;
		margin-right: 30px;
		margin-bottom: 5px;
	}

	.vert-cent {
		position: relative;
	    top: 50%;
	    transform: translateY(-50%);
	}

	hr
	{
		border: 0;
		height: 1.5px;
		background-image: linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.75), rgba(0, 0, 0, 0));
	}

	p.small {
		font-size: 12px
	}
</style>

<html>
  <head>
		<title>Action2Motion: Conditioned Generation of 3D Human Motions</title>
<!-- 		<meta property="og:image" content="http://people.eecs.berkeley.edu/~tinghuiz/projects/mpi/images/teaser.png"/>
		<meta property="og:title" content="Stereo Magnification: Learning View Synthesis using Multiplane Images" /> -->
  </head>

  <body>
    <br>
    <center>
    <span style="font-size:36px">Action2Motion: Conditioned Generation of 3D Human Motions</span>
	</center>
    
	<br>
  	<table align=center width=900px>
  	 <tr>
		<td align=center width=100px>
		<center>
		<span style="font-size:20px"><a href="https://ericguo5513.github.io/">Chuan Guo<sup>1</sup></a></span>
		</center>
		</td>

		<td align=center width=100px>
		<center>
		<span style="font-size:20px"><a href="https://sites.google.com/site/xinxinzuohome/home">Xinxin Zuo<sup>1,4</sup></a></span>
		</center>
		</td>

		<td align=center width=100px>
		<center>
		<span style="font-size:20px"><a href="https://sites.google.com/site/senwang1312home/">Sen Wang<sup>1,4</sup></a></span>
		</center>
		</td>

		<td align=center width=100px>
		<center>
		<span style="font-size:20px"><a href="https://jimmyzou.github.io/">Shihao Zou<sup>1</sup></a></span>
		</center>
		</td>


		<td align=center width=100px>
		<center>
		<span style="font-size:20px">Qingyao Sun<sup>2</sup><sup>*</sup></a></span>
		</center>
		</td>
	</tr>
	
	<table align=center width=700px>
  	 <tr>		
  	 	<td align=center width=100px>
		<center>
		<span style="font-size:20px">Anan Deng<sup>3</sup><sup>*</sup></a></span>
		</center>
		</td>

		<td align=center width=100px>
		<center>
		<span style="font-size:20px"><a href="http://www.cs.mun.ca/~gong/">Minglun Gong<sup>4</sup></a></span>
		</center>
		</td>

		<td align=center width=100px>
		<center>
		<span style="font-size:20px"><a href="https://www.ece.ualberta.ca/~lcheng5/">Li Cheng<sup>1</sup></a></span>
		</center>
		</td>

	 </tr>
	</table>
	
	<br>

	<table align=center width=800px>
  	 <tr>
		<td align=center width=110px>
		<center>
		<span style="font-size:18px"><sup>1</sup>University of Alberta</span></center>
		</center>
		</td>

		<td align=center width=110px>
		<center>
		<span style="font-size:18px"><sup>2</sup>University of Chicago</span></center>
		</center>
		</td>

		<td align=center width=110px>
		<center>
		<span style="font-size:18px"><sup>3</sup>Yale University</span></center>
		</center>
		</td>

		<td align=center width=110px>
		<center>
		<span style="font-size:18px"><sup>4</sup>University of Guelph</span></center>
		</center>
		</td>
	 </tr>
	</table>

	<br>

	<table align=center width=500px>
  	 <tr>
		<td align=center width=50px>
		<center>
		<span style="font-size:18px"><a href="#data">[Data]</a></span>
		</center>
		</td>

		<td align=center width=50px>
		<center>
		<span style="font-size:18px"><a href="https://github.com/EricGuo5513/action-to-motion">[Code]</a></span>
		</center>
		</td>

		<td align=center width=50px>
		<center>
		<span style="font-size:18px">MultiMedia 2020 <a href="https://arxiv.org/pdf/2007.15240.pdf">[Paper]</a></span>
		</center>
		</td>
	 </tr>
	</table>
 
  		  <br>
  		  <table align=center width=900px>
  			  <tr>
  	              <td width=600px>
  					<center>
  	                	<a href="./website/teaser.png"><img src = "./website/teaser.png" height="400px"></img></href></a><br>
					</center>
  	              </td>
  	          </tr>
  		  </table>

      	  <br>
      	  <p style="text-align:justify">
          	 Action recognition is a relatively established task, where given an input sequence of human motion, the goal is to predict its action category. This paper, on the other hand, considers a relatively new problem, which could be thought of as an inverse of action recognition: given a prescribed action type, we aim to generate plausible human motion sequences in 3D. Importantly, the set of generated motions are expected to maintain its diversity to be able to explore the entire action-conditioned motion space; meanwhile, each sampled sequence faithfully resembles a natural human body articulation dynamics. Motivated by these objectives, we follow the physics law of human kinematics by adopting the Lie Algebra theory to represent the natural human motions; we also propose a temporal Variational Auto-Encoder (VAE) that encourages a diverse sampling of the motion space. A new 3D human motion dataset, HumanAct12, is also constructed. Empirical experiments over three distinct human motion datasets (including ours) demonstrate the effectiveness of our approach.
      	  </p>


		  <hr>
		  <div id=paper>
		 <!-- <table align=center width=550px> -->
  		  <table align=center width=1100>
	 		<center><h1>Paper</h1></center>
  			  <tr>
  			  		<td width=50px align=left></td>
				  <td><a href="https://arxiv.org/pdf/2007.15240.pdf"><img style="height:180px" src="./website/paper.png"/></a></td>
				  <td width=10px align=left></td>
				  <td><span style="font-size:14pt">Action2Motion: Conditioned Generation of 3D Human Motions<br>
                          <i>Chuan Guo, Xinxin Zuo, Sen Wang, Shihao Zou, Qingyao Sun, Annan Deng, Minglun Gong, Li Cheng</i><br>
				  ACM MultiMedia, 2020<br>
				  <br>
				  <a href="https://arxiv.org/pdf/2007.15240.pdf">[Paper]</a>
				  &nbsp; &nbsp;
                   		 <a href="./website/bibtex.txt">[Bibtex]</a>
                   		 </span>
				  </td>

              </tr>
  		  </table>

		  <br>

  		  	<hr>
			<center><h1>Demo Video</h1>

			<table align=center width=1100px>
				<tr height="600px">
					<td valign="top" width=1000px>
					<center>
						<iframe width="1000" height="600" src="https://www.youtube.com/embed/eDzN3mhNdeo" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>					
					</center>
					</td>
				</tr>
			</table>
			</center>
		<br>
	  
	  	<hr>

	  </span>
	</td>
	</div>

		 <!-- <table align=center width=550px> -->
	 	<div id='data'>
	 		<center><h1>Data</h1></center>
	 		Our work engages three human motion datasets. Firstly, our own in-house dataset HumanAct12 is built with both coarse-grained and fine-grained action categories; other two existing datasets(i.e. NTU-RGB, CMU Mocap) are adjusted to better fit our purpose. If you're using our dataset, we'd appreciate your citation of our <a href="./website/bibtex.txt"> work</a>
  		  	
  		  	<table align=center width=1000>
	 			<center><h3>HumanAct12</h3></center>
	 	  		<tr>
	  			  	<td width='400px'>
	  			  	<p style="text-align:justify">
	  	              HumanAct12 is adopted from the polar image and 3D pose dataset <a href="https://jimmyzou.github.io/publication/2020-PHSPDataset"> PHSPD</a>, with proper temporal cropping and action annotating. Statistically, there are 1191 3D motion clips(and 90,099 poses in total) which are categorized into 12 action classes, and 34 fine-grained sub-classes. The action types includes our daily actions such as <i>walk, run, sit down, jump up, warm up, </i>etc. Fine-grained action types contain more specifical information like <i>Warm up by bowing left side, Warm up by pressing left leg, etc</i>. The detailed statistics of HumanAct12 could be referred to our paper or documents in the dataset link. Our dataset also is registered with PHSPD dataset, in case one also needs extra meta-source(e.g. polar image, depth image, color image, etc); please see the <i>ReadMe</i> file for detailed registration information. If you're only intereted in 3d joints and action annoataion of human motion, you could just download our dataset <a href="here"> here</a>. If you require more dedicated data modality, you may consider <a href="https://jimmyzou.github.io/publication/2020-PHSPDataset"> PHSPD</a> as well. 
	  	          	</p>
					</td>
					<br>
	            </tr>
	        </table>

	        <table align=center width=1000> 
				<center><h3>Refined NTU-RGBD</h3></center>
	          	<tr>
				  	<td width='400px'>
				  	<p style="text-align:justify">
		              Due to the inaccuracy of 3D joint annoations in original NTU-RGBD dataset, we re-estimate the 3D positions of poses from the point cloud extratcted from rough RGBD videos, by the  recently proposed method <a href="https://github.com/mkocabas/VIBE"> "video inference for human body pose and shape estimation(CVPR 2020)" </a>. Note this dataset is only partial of NTU-RGBD dataset, which contains 13 distinct acition types(e.g. cheer up, pick up, salute, etc.) constituting 3,902 motion clips. We kept the name of each motion as origin. Due to the <font color="blue">Release Agreement</font> of NTU-RGBD dataset, we are not allowed to and will no longer provide the access to our re-estimated NTU-RGBD data.</a>
		          	</p>
					</td>
					<br>
	          	</tr>
	        </table>


	        <table align=center width=1000> 
	        	<center><h3>CMU Mocap</h3></center>
   	            <tr>
				  	<td width='400px'>
				  	<p style="text-align:justify">
		              The original CMU Mocap dataset is not orgnized by action types. Based on their motion descriptions, we identify 8 disparate actions, including running, walking, jumping, climbing, and manually re-organize 1,088 motions. Here each skeleton is annotated with 20 3D joints (19 bones). In practice, the pose sequences are down-sampled to a frequency of 12 HZ from 100 HZ. Download <a href="https://drive.google.com/drive/folders/1_2jbZK48Li6sm1duNJnR_eyQjVdJQDoU?usp=sharing"> here</a>
		          	</p>
					</td>
					<br>
	            </tr>
	        </table>
  		 </div>
		  <br>


		<hr>

	<div id='code'>
		<center><h1>Try Our Code</h1></center>
  		  <table align=center width=1100>
			<tr><center>
                          <a href='https://github.com/EricGuo5513/action-to-motion'><img class="round" style="height:400" src="./website/Architecture.jpg"/></a>
                        </center></tr>
          	  </table>

    	  <table align=center width=800px>
      		<tr><center> <br>
        	<span style="font-size:28px">&nbsp;<a href='https://github.com/EricGuo5513/action-to-motion'>[GitHub]</a>
          
        	<span style="font-size:28px"></a></span>
      		<br>
      		</center></tr>
  		  </table>
	 		
  			  	
  		 </div>
		  <br>


		<hr>

  		 
	<div id='visual results'>
		<center><h1>Visual Results</h1></center>
  		  <table align=center width=1100>
	 		  <tr> 
	 		  	<td width="100px">
	 		  	</td>
	 		  	<td width="50px">
	 		  		<h3>Phone</h3>
	 		  	</td>
	 		  	<td width="900px">
	 		  		<center>
	 		  			<img style="height:300" src="./website/phone2.gif"/>
	 		  			<img style="height:300" src="./website/phone.gif"/>
	 		  			<img style="height:300" src="./website/phone3.gif"/>
	 		  		</center>	
	 		  	</td>
	 		  </tr>
  			  	
  		  </table>

  		  <table align=center width=1100>
	 		  <tr> 
	 		  	<td width="100px">
	 		  	</td>
	 		  	<td width="50px">
	 		  		<h3>Throw</h3>
	 		  	</td>
	 		  	<td width="900px">
	 		  		<center>
	 		  			<img style="height:300" src="./website/throw1.gif"/>
	 		  			<img style="height:300" src="./website/throw2.gif"/>
	 		  			<img style="height:300" src="./website/throw3.gif"/>
	 		  		</center>	
	 		  	</td>
	 		  </tr>
  			  	
  		  </table>

  		  <table align=center width=1100>
	 		  <tr> 
	 		  	<td width="100px">
	 		  	</td>
	 		  	<td width="50px">
	 		  		<h3>Jump</h3>
	 		  	</td>
	 		  	<td width="900px">
	 		  		<center>
	 		  			<img style="height:300" src="./website/jump1.gif"/>
	 		  			<img style="height:300" src="./website/jump3.gif"/>
	 		  			<img style="height:300" src="./website/jump2.gif"/>
	 		  		</center>	
	 		  	</td>
	 		  </tr>
  			  	
  		  </table>

  		<center><h3>Motions with Different Scales</h3></center>

  		<table align=center width=1100>
	 		  <tr> 
	 		  	<td width="100px">
	 		  	</td>
	 		  	<td width="50px">
	 		  		<h3>Jump</h3>
	 		  	</td>
	 		  	<td width="900px">
	 		  		<center>
	 		  			<img style="height:300" src="./website/jump_1.gif"/>
	 		  			<img style="height:300" src="./website/jump_2.gif"/>
	 		  			<img style="height:300" src="./website/jump_3.gif"/>
	 		  		</center>	
	 		  	</td>
	 		  </tr>
  			  	
  		  </table>
  	</div>

  		 <br>
  		 <hr>
  		  <table align=center width=1100px>
  			  <tr>
  	              <td>
  					<left>
	  		  <center><h1>Acknowledgements</h1></center>
	  		  This work is supported by the University of Alberta Start-up grant, the NSERC Discovery Grants including No. RGPIN-2019-04575, and the University of Alberta-Huawei Joint Innovation Collaboration grants.  This webpage template was borrowed from <a href="https://richzhang.github.io/colorization/">here</a>.

			</left>
				</td>
			</tr>
		</table>
		<br><br>

		<hr>
		<p class='small'>
  		  	<!-- <br><sup>*</sup>C. Chan is currently a graduate student at MIT CSAIL.</br> -->
  		  	<br><sup>*</sup>Q. Sun and A. Deng did this project during their internship at UoA</br>
  		  </p>
</body>
</html>