<?xml version="1.0"?>
<?xml-stylesheet type="text/css" href="http://index.cslt.org/mediawiki/skins/common/feed.css?303"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="zh-cn">
		<id>http://index.cslt.org/mediawiki/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=Renshipan</id>
		<title>cslt Wiki - 用户贡献 [zh-cn]</title>
		<link rel="self" type="application/atom+xml" href="http://index.cslt.org/mediawiki/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=Renshipan"/>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/%E7%89%B9%E6%AE%8A:%E7%94%A8%E6%88%B7%E8%B4%A1%E7%8C%AE/Renshipan"/>
		<updated>2026-04-17T09:24:07Z</updated>
		<subtitle>用户贡献</subtitle>
		<generator>MediaWiki 1.23.3</generator>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-09-04T07:41:23Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* check experimental results&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Jiayu Guo || 11:30 || 21:00 || 9 || &lt;br /&gt;
* learn VV model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Jiayu Guo || 10:00 || 20:00 || 10 || &lt;br /&gt;
* clean up the code&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-09-04T07:40:47Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* check experimental results&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Jiayu Guo || 11:30 || 21:00 || 9 || &lt;br /&gt;
* learn VV model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0) '&amp;amp;quot'&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Jiayu Guo || 10:00 || 20:00 || 10 || &lt;br /&gt;
* clean up the code&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-09-04T07:40:15Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* check experimental results&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Jiayu Guo || 11:30 || 21:00 || 9 || &lt;br /&gt;
* learn VV model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0) ‘&amp;amp;quot’&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Jiayu Guo || 10:00 || 20:00 || 10 || &lt;br /&gt;
* clean up the code&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-09-04T07:37:24Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* check experimental results&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Jiayu Guo || 11:30 || 21:00 || 9 || &lt;br /&gt;
* learn VV model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Jiayu Guo || 10:00 || 20:00 || 10 || &lt;br /&gt;
* clean up the code&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-09-04T07:36:25Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* check experimental results&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Jiayu Guo || 11:30 || 21:00 || 9 || &lt;br /&gt;
* learn VV model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Jiayu Guo || 10:00 || 20:00 || 10 || &lt;br /&gt;
* clean up the code&amp;quot; were changed to ‘&amp;amp;quot;’&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-09-04T07:35:50Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* check experimental results&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Jiayu Guo || 11:30 || 21:00 || 9 || &lt;br /&gt;
* learn VV model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Jiayu Guo || 10:00 || 20:00 || 10 || &lt;br /&gt;
* clean up the code&amp;quot; were changed to ‘&amp;amp;quot’&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4</id>
		<title>NLP Status Report 2017-9-4</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4"/>
				<updated>2017-09-04T02:04:31Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document[http://cslt.riit.tsinghua.edu.cn/mediawiki/index.php/%E6%96%87%E4%BB%B6:VvPoem.docx]&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* checked experimental results and found some small problems,like this [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/9/99/Replacement.docx]&lt;br /&gt;
* wrote the first draft paper of ViVi_NMT [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/8/80/ViVi_NMT.docx]&lt;br /&gt;
* pushed the baseline code,readme file and user manual to cslt github(https://github.com/CSLT-THU/CSLT_NMT)&lt;br /&gt;
||  &lt;br /&gt;
* read some translating phrases papers&lt;br /&gt;
* learn more about the memory mechanism &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* Clean up the code of predict.py&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* configuration environment of TF-0.12 or update the code to adapt TF-1.0&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Replacement.docx</id>
		<title>文件:Replacement.docx</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Replacement.docx"/>
				<updated>2017-09-04T02:03:42Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4</id>
		<title>NLP Status Report 2017-9-4</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4"/>
				<updated>2017-09-04T02:00:57Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document[http://cslt.riit.tsinghua.edu.cn/mediawiki/index.php/%E6%96%87%E4%BB%B6:VvPoem.docx]&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* checked experimental results and found some small problems.&lt;br /&gt;
* wrote the first draft paper of ViVi_NMT [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/8/80/ViVi_NMT.docx]&lt;br /&gt;
* pushed the baseline code,readme file and user manual to cslt github(https://github.com/CSLT-THU/CSLT_NMT)&lt;br /&gt;
||  &lt;br /&gt;
* read some translating phrases papers&lt;br /&gt;
* learn more about the memory mechanism &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* Clean up the code of predict.py&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* configuration environment of TF-0.12 or update the code to adapt TF-1.0&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4</id>
		<title>NLP Status Report 2017-9-4</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4"/>
				<updated>2017-09-04T02:00:06Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document[http://cslt.riit.tsinghua.edu.cn/mediawiki/index.php/%E6%96%87%E4%BB%B6:VvPoem.docx]&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* checked experimental results and found some small problems，like follows：&lt;br /&gt;
    sentence before tokenize：&lt;br /&gt;
    &amp;quot;If no more people smoked, this rate would increase to at least 50%,&amp;quot; says André Beaulieu, &lt;br /&gt;
    spokesman for the Canadian Cancer Society.&lt;br /&gt;
    sentence after tokenize：&lt;br /&gt;
    &amp;amp;quot; if no more people smoked , this rate would increase to at least 50 % , &amp;amp;quot; says andré beaulieu , &lt;br /&gt;
    spokesman for the canadian cancer society .&lt;br /&gt;
    It was just a equivalent replacement&lt;br /&gt;
* wrote the first draft paper of ViVi_NMT [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/8/80/ViVi_NMT.docx]&lt;br /&gt;
* pushed the baseline code,readme file and user manual to cslt github(https://github.com/CSLT-THU/CSLT_NMT)&lt;br /&gt;
||  &lt;br /&gt;
* read some translating phrases papers&lt;br /&gt;
* learn more about the memory mechanism &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* Clean up the code of predict.py&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* configuration environment of TF-0.12 or update the code to adapt TF-1.0&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:ViVi_NMT.docx</id>
		<title>文件:ViVi NMT.docx</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:ViVi_NMT.docx"/>
				<updated>2017-09-04T01:58:56Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4</id>
		<title>NLP Status Report 2017-9-4</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4"/>
				<updated>2017-09-04T01:58:38Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document[http://cslt.riit.tsinghua.edu.cn/mediawiki/index.php/%E6%96%87%E4%BB%B6:VvPoem.docx]&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* checked experimental results and found some small problems，like follows：&lt;br /&gt;
    sentence before tokenize：&lt;br /&gt;
    &amp;quot;If no more people smoked, this rate would increase to at least 50%,&amp;quot; says André Beaulieu, &lt;br /&gt;
    spokesman for the Canadian Cancer Society.&lt;br /&gt;
    sentence after tokenize：&lt;br /&gt;
    &amp;amp;quot; if no more people smoked , this rate would increase to at least 50 % , &amp;amp;quot; says andré beaulieu , &lt;br /&gt;
    spokesman for the canadian cancer society .&lt;br /&gt;
    It was just a equivalent replacement&lt;br /&gt;
* wrote the first draft paper of ViVi_NMT  &lt;br /&gt;
* pushed the baseline code,readme file and user manual to cslt github(https://github.com/CSLT-THU/CSLT_NMT)&lt;br /&gt;
||  &lt;br /&gt;
* read some translating phrases papers&lt;br /&gt;
* learn more about the memory mechanism &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* Clean up the code of predict.py&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* configuration environment of TF-0.12 or update the code to adapt TF-1.0&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4</id>
		<title>NLP Status Report 2017-9-4</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4"/>
				<updated>2017-09-04T01:53:02Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document[http://cslt.riit.tsinghua.edu.cn/mediawiki/index.php/%E6%96%87%E4%BB%B6:VvPoem.docx]&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* checked experimental results and found some small problems，like follows：&lt;br /&gt;
    sentence before tokenize：&lt;br /&gt;
    &amp;quot;If no more people smoked, this rate would increase to at least 50%,&amp;quot; says André Beaulieu, &lt;br /&gt;
    spokesman for the Canadian Cancer Society.&lt;br /&gt;
    sentence after tokenize：&lt;br /&gt;
    &amp;amp;quot; if no more people smoked , this rate would increase to at least 50 % , &amp;amp;quot; says andré beaulieu , &lt;br /&gt;
    spokesman for the canadian cancer society .&lt;br /&gt;
    It is just a equivalent replacement&lt;br /&gt;
* wrote the first draft paper of ViVi_NMT  &lt;br /&gt;
* pushed the baseline code,readme file and user manual to cslt github(https://github.com/CSLT-THU/CSLT_NMT)&lt;br /&gt;
||  &lt;br /&gt;
* read some translating phrases papers&lt;br /&gt;
* learn more about the memory mechanism &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* Clean up the code of predict.py&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* configuration environment of TF-0.12 or update the code to adapt TF-1.0&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4</id>
		<title>NLP Status Report 2017-9-4</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4"/>
				<updated>2017-09-04T01:52:07Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document[http://cslt.riit.tsinghua.edu.cn/mediawiki/index.php/%E6%96%87%E4%BB%B6:VvPoem.docx]&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* checked experimental results and found some few small problems，like follows：&lt;br /&gt;
    sentence before tokenize：&lt;br /&gt;
    &amp;quot;If no more people smoked, this rate would increase to at least 50%,&amp;quot; says André Beaulieu, &lt;br /&gt;
    spokesman for the Canadian Cancer Society.&lt;br /&gt;
    sentence after tokenize：&lt;br /&gt;
    &amp;amp;quot; if no more people smoked , this rate would increase to at least 50 % , &amp;amp;quot; says andré beaulieu , &lt;br /&gt;
    spokesman for the canadian cancer society .&lt;br /&gt;
    It is just a equivalent replacement&lt;br /&gt;
* wrote the first draft paper of ViVi_NMT  &lt;br /&gt;
* pushed the baseline code,readme file and user manual to cslt github(https://github.com/CSLT-THU/CSLT_NMT)&lt;br /&gt;
||  &lt;br /&gt;
* read some translating phrases papers&lt;br /&gt;
* learn more about the memory mechanism &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* Clean up the code of predict.py&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* configuration environment of TF-0.12 or update the code to adapt TF-1.0&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4</id>
		<title>NLP Status Report 2017-9-4</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-9-4"/>
				<updated>2017-09-04T01:32:47Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：以“{| class=&amp;quot;wikitable&amp;quot; !Date !! People !! Last Week !! This Week |- | rowspan=&amp;quot;6&amp;quot;|2017/8/14 |Jiyuan Zhang || *code refactoring *wrote a document[http://cslt.riit.tsing...”为内容创建页面&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document[http://cslt.riit.tsinghua.edu.cn/mediawiki/index.php/%E6%96%87%E4%BB%B6:VvPoem.docx]&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* read the released information of other toolkits for nmt&lt;br /&gt;
* cleaned up the code&lt;br /&gt;
* wrote the documents [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/5/5c/Manual_V1.0.docx] [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/3/38/Manual_V0.10.docx]&lt;br /&gt;
||  &lt;br /&gt;
* write the papers of our baseline system&lt;br /&gt;
* read augmented nmt code&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* learn the source code of the mode&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* learn the source code of seq2seq model and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-09-01T02:04:18Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* check experimental results&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Jiayu Guo || 11:30 || 21:00 || 9 || &lt;br /&gt;
* learn VV model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Jiayu Guo || 10:00 || 20:00 || 10 || &lt;br /&gt;
* clean up the code&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-31T13:21:29Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* check experimental results&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Jiayu Guo || 11:30 || 21:00 || 9 || &lt;br /&gt;
* learn VV model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/31&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-31T13:18:46Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jiayu Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jiayu Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jiayu Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* check experimental results&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jiayu Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Jiayu Guo || 8:10 || 21:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/29&lt;br /&gt;
|Jiayu Guo || 11:00 || 21:00 || 10 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/30&lt;br /&gt;
|Jiayu Guo || 11:30 || 21:00 || 9 || &lt;br /&gt;
* learn VV model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/28&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the paper of ViVi_NMT(version 1.0)&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28</id>
		<title>NLP Status Report 2017-8-28</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28"/>
				<updated>2017-08-28T07:50:59Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document[http://cslt.riit.tsinghua.edu.cn/mediawiki/index.php/%E6%96%87%E4%BB%B6:VvPoem.docx]&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* read the released information of other toolkits for nmt&lt;br /&gt;
* cleaned up the code&lt;br /&gt;
* wrote the documents [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/5/5c/Manual_V1.0.docx] [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/3/38/Manual_V0.10.docx]&lt;br /&gt;
||  &lt;br /&gt;
* write the papers of our baseline system&lt;br /&gt;
* read augmented nmt code&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* learn the source code of the mode&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* learn the source code of seq2seq model and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Manual_V0.10.docx</id>
		<title>文件:Manual V0.10.docx</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Manual_V0.10.docx"/>
				<updated>2017-08-28T07:50:32Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：user manual of nmt baseline 0.10&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;user manual of nmt baseline 0.10&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Manual_V1.0.docx</id>
		<title>文件:Manual V1.0.docx</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Manual_V1.0.docx"/>
				<updated>2017-08-28T07:46:48Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：user manual of nmt baseline system&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;user manual of nmt baseline system&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-28T07:44:30Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*done some work about code refactoring for poem system &lt;br /&gt;
|| &lt;br /&gt;
*plan to complete code refactoring for poem system&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* organized all the experimental results(our baseline system,Moses,THUMT) [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/8/89/Nmt_baseline.xlsx]&lt;br /&gt;
* trained and tested translation models（Toolkit:THUMT ）&lt;br /&gt;
* compared with our system&lt;br /&gt;
||&lt;br /&gt;
* prepare to release the baseline system（tensorflow1.0 version）&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* process data and run model;&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(pairs of sentence/the quality——the modern language text includes context information).&lt;br /&gt;
||  &lt;br /&gt;
*plan to read source code of seq2seq model and learn tensorflow;&lt;br /&gt;
*plan to read a paper named Automatic Long Sentence Segmentation for NMT&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Nmt_baseline.xlsx</id>
		<title>文件:Nmt baseline.xlsx</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/%E6%96%87%E4%BB%B6:Nmt_baseline.xlsx"/>
				<updated>2017-08-28T07:43:36Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：nmt baseline experimental results&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;nmt baseline experimental results&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28</id>
		<title>NLP Status Report 2017-8-28</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28"/>
				<updated>2017-08-28T06:31:53Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* read the released information of other toolkits for nmt&lt;br /&gt;
* cleaned up the code&lt;br /&gt;
* wrote the documents &lt;br /&gt;
||  &lt;br /&gt;
* write the papers of our baseline system&lt;br /&gt;
* read augmented nmt code&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* process data and run model;&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(pairs of sentence/the quality——the modern language text includes context information).&lt;br /&gt;
||  &lt;br /&gt;
*plan to read source code of seq2seq model and learn tensorflow;&lt;br /&gt;
*plan to read a paper named Automatic Long Sentence Segmentation for NMT&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
|zhangshuai &lt;br /&gt;
||  &lt;br /&gt;
* learn model source code&lt;br /&gt;
||  &lt;br /&gt;
* learn tensorflow and source code&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-28T06:28:36Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jia Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jia Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jia Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-28T06:25:55Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 9:00|| 20:00 || 11 ||&lt;br /&gt;
* test results.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jia Guo || 9:30 || 21:30 || 12 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Jia Guo || 9:10 || 23:00 || 9.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/22&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 12 || &lt;br /&gt;
* read the source code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/23&lt;br /&gt;
|Jia Guo || 9:00 || 22:00 || 11 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* wrote the documents&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/24&lt;br /&gt;
|Jia Guo || 9:10 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/25&lt;br /&gt;
|Jia Guo || 8:50 || 22:00 || 10.5 || &lt;br /&gt;
* read the source code and learn tensorflow&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28</id>
		<title>NLP Status Report 2017-8-28</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-28"/>
				<updated>2017-08-28T06:22:16Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*code refactoring&lt;br /&gt;
*wrote a document&lt;br /&gt;
|| &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren &lt;br /&gt;
||  &lt;br /&gt;
* wrote the documents &lt;br /&gt;
* cleaned up the code&lt;br /&gt;
||  &lt;br /&gt;
* write the papers of our baseline system&lt;br /&gt;
* read augmented nmt code&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* process data and run model;&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(pairs of sentence/the quality——the modern language text includes context information).&lt;br /&gt;
||  &lt;br /&gt;
*plan to read source code of seq2seq model and learn tensorflow;&lt;br /&gt;
*plan to read a paper named Automatic Long Sentence Segmentation for NMT&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
|zhangshuai &lt;br /&gt;
||  &lt;br /&gt;
* learn model source code&lt;br /&gt;
||  &lt;br /&gt;
* learn tensorflow and source code&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T11:40:05Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T11:39:33Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/21&lt;br /&gt;
|Shipan Ren || 10:00 || 22:00 || 12 || &lt;br /&gt;
* read the released information of other translation systems&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T11:29:16Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/16&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 10||&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T11:22:00Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/15&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/17&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T05:16:41Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*done some work about code refactoring for poem system &lt;br /&gt;
|| &lt;br /&gt;
*plan to complete code refactoring for poem system&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* organized all the experimental results(our baseline system,Moses,THUMT) &lt;br /&gt;
* trained and tested translation models（Toolkit:THUMT ）&lt;br /&gt;
* compared with our system&lt;br /&gt;
||&lt;br /&gt;
* prepare to release the baseline system（tensorflow1.0 version）&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* process data and run model;&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* read source code of seq2seq model;&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21</id>
		<title>NLP Status Report 2017-8-21</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-21"/>
				<updated>2017-08-21T05:09:46Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*done work about code refactoring for poem system &lt;br /&gt;
|| &lt;br /&gt;
*plan to complete code refactoring for poem system&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* organized all the experimental results(our baseline system,Moses,THUMT) &lt;br /&gt;
* train translation models by using THUMT &lt;br /&gt;
* test the bleu of these models&lt;br /&gt;
* compare with our system&lt;br /&gt;
||&lt;br /&gt;
* prepare to release the baseline system（tensorflow1.0 version）&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
* process data and run model;&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
||  &lt;br /&gt;
* read source code of seq2seq model;&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T03:21:28Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-de &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-de dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset:WMT2014 en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read papers about memory-augmented nmt&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document &lt;br /&gt;
|&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read memory-augmented nmt code &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* looked for the performance(the bleu value) of other seq2seq models&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* learn moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* installed and built Moses on the server &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation model and test it&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if moses can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* toolkit: Moses&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train statistical machine translation models and test it &lt;br /&gt;
* dataset:zh-en big,WMT2014 en-de,WMT2014 en-fr&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* collate experimental results&lt;br /&gt;
* compare our baseline model with Moses &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read paper about THUMT&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* read THUMT manual and learn how to use it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en small&lt;br /&gt;
* test if THUMT can work normally&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* code automation scripts to process data,train model and test model &lt;br /&gt;
* train translation models and test them&lt;br /&gt;
* toolkit: THUMT&lt;br /&gt;
* dataset:zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/19&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* test translation models by using single reference and  multiple reference &lt;br /&gt;
* organize all the experimental results(our baseline system,Moses,THUMT)&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/20&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T02:24:07Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
* (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-s&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/19&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/20&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T02:21:48Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
(reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
* datasets:WMT2014 en-de and en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-s&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/19&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/20&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T02:19:48Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
dataset：zh-en small&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
(reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/12&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
dataset：zh-en big&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/13&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
    reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
* I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/14&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/17&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* downloaded the wmt2014 data sets and processed it&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* processed data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
datasets:WMT2014 en-de and en-fr datasets&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-s&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/19&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/20&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T02:02:12Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* trained translation models using tf1.0 baseline and tf0.1 baseline perspectively&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* tested these checkpoints&lt;br /&gt;
* found the new version takes less time &lt;br /&gt;
* found these two versions have similar complexity and bleu values &lt;br /&gt;
* found that the bleu is still good when the model is over fitting .&lt;br /&gt;
(reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-s&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/19&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/20&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/Schedule</id>
		<title>Schedule</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/Schedule"/>
				<updated>2017-08-21T01:53:45Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：/* Daily Report */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;=NLP Schedule=&lt;br /&gt;
&lt;br /&gt;
==Members==&lt;br /&gt;
&lt;br /&gt;
===Current Members===&lt;br /&gt;
&lt;br /&gt;
* Yang Feng (冯洋)&lt;br /&gt;
* Jiyuan Zhang （张记袁）&lt;br /&gt;
* Aodong Li (李傲冬)&lt;br /&gt;
* Andi Zhang (张安迪)&lt;br /&gt;
* Shiyue Zhang (张诗悦)&lt;br /&gt;
* Li Gu (古丽)&lt;br /&gt;
* Peilun Xiao (肖培伦)&lt;br /&gt;
* Shipan Ren (任师攀)&lt;br /&gt;
* Jiayu Guo (郭佳雨)&lt;br /&gt;
&lt;br /&gt;
===Former Members===&lt;br /&gt;
* '''Chao Xing (邢超)'''     :  FreeNeb&lt;br /&gt;
* '''Rong Liu (刘荣)'''      :  优酷&lt;br /&gt;
* '''Xiaoxi Wang (王晓曦)''' :  图灵机器人&lt;br /&gt;
* '''Xi Ma (马习)'''         :  清华大学研究生&lt;br /&gt;
* '''Tianyi Luo (骆天一)'''  ： phd candidate in University of California Santa Cruz&lt;br /&gt;
* '''Qixin Wang (王琪鑫)'''  :  MA candidate in University of California&lt;br /&gt;
* '''DongXu Zhang (张东旭)''': --&lt;br /&gt;
* '''Yiqiao Pan (潘一桥)'''  ： MA candidate in University of Sydney &lt;br /&gt;
* '''Shiyao Li （李诗瑶）''' :  BUPT&lt;br /&gt;
* '''Aiting Liu (刘艾婷)'''  :  BUPT&lt;br /&gt;
&lt;br /&gt;
==Work Progress==&lt;br /&gt;
===Daily Report===&lt;br /&gt;
&lt;br /&gt;
{|class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Person  !! start!! leave !! hours ||status&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/02&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/03&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/04&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/05&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/06&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/07&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/08&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/09&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/10&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/11&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/12&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/13&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/14&lt;br /&gt;
|Andy Zhang||9:30 ||18:30 ||8 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/04/15&lt;br /&gt;
|Andy Zhang||9:00 ||15:00 ||6 || &lt;br /&gt;
*preparing EMNLP&lt;br /&gt;
|-&lt;br /&gt;
|Peilun Xiao || || || ||&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/18&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Pick up new task in news generation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/19&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/20&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/21&lt;br /&gt;
|Aodong Li||12:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/24&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Adjust literature review focus&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/25&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/26&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/27&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Try to reproduce sc-lstm work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/28&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Transfer to new task in machine translation and do literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/04/30&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/01&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/02&lt;br /&gt;
|Aodong Li||11:00 ||20:00 ||8 || &lt;br /&gt;
*Literature review and code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/06&lt;br /&gt;
|Aodong Li||14:20 ||17:20||3 || &lt;br /&gt;
*Code review&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/07&lt;br /&gt;
|Aodong Li||13:30 ||22:00||8 || &lt;br /&gt;
*Code review and experiment started, but version discrepancy encountered&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/08&lt;br /&gt;
|Aodong Li||11:30 ||21:00 ||8 || &lt;br /&gt;
*Code review and version discrepancy solved&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/09&lt;br /&gt;
|Aodong Li||13:00 ||22:00 ||9 || &lt;br /&gt;
*Code review and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 42.56&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
*Entry procedures&lt;br /&gt;
*Machine Translation paper reading&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/10&lt;br /&gt;
|Aodong Li || 13:30 || 22:00 || 8 || &lt;br /&gt;
*experiment setting: &lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the different training data, counting 22000 and 22017 seperately&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 36.67 (36.67 is the model at 4750 updates, but we use model at 3000 updates to&lt;br /&gt;
                     prevent the case of overfitting, to generate the 2nd translator's training data, for &lt;br /&gt;
                     which the BLEU is 34.96)&lt;br /&gt;
  best result of our model: 29.81&lt;br /&gt;
  This may suggest that that using either the same training data with 1st translator or different&lt;br /&gt;
                    one won't influence 2nd translator's performance, instead, using the same one may&lt;br /&gt;
                     be better, at least from results. But I have to give a consideration of a smaller size &lt;br /&gt;
                     of training data compared to yesterday's model.&lt;br /&gt;
*code 2nd translator with constant embedding&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*Configure environment &lt;br /&gt;
*Run tf_translate code&lt;br /&gt;
*Read Machine Translation paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/11&lt;br /&gt;
|Aodong Li || 13:00 ||  21:00|| 8 || &lt;br /&gt;
*experiment setting:&lt;br /&gt;
  small data, &lt;br /&gt;
  1st and 2nd translator uses the same training data, &lt;br /&gt;
  2nd translator uses '''constant untrainable embedding''' imported from 1st translator's decoder&lt;br /&gt;
*results (BLEU):&lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.48&lt;br /&gt;
  Experiments show that this kind of series or cascade model will definitely impair the final perfor-&lt;br /&gt;
                      mance due to information loss as the information flows through the network from &lt;br /&gt;
                      end to end. Decoder's smaller vocabulary size compared to encoder's demonstrate&lt;br /&gt;
                      this (9000+ -&amp;gt; 6000+).&lt;br /&gt;
  The intention of this experiment is looking for a map to solve meaning shift using 2nd translator,&lt;br /&gt;
                      but result of whether the map is learned or not is obscured by the smaller vocab size &lt;br /&gt;
                      phenomenon.&lt;br /&gt;
*literature review on hierarchical machine translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/12&lt;br /&gt;
|Aodong Li||13:00 ||21:00 ||8 || &lt;br /&gt;
*Code double decoding model and read multilingual MT paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/13&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
*read machine translation paper &lt;br /&gt;
*learne lstm model and seq2seq model &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/14&lt;br /&gt;
|Aodong Li || 10:00 || 20:00 || 9 || &lt;br /&gt;
*Code double decoding model and experiment&lt;br /&gt;
*details about experiment: &lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: 43.53&lt;br /&gt;
*NEXT: 2nd translator uses '''trained constant embedding'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/15&lt;br /&gt;
|Shipan Ren || 9:30 || 19:00 || 9.5 || &lt;br /&gt;
* understand the difference between lstm model and gru model&lt;br /&gt;
* read the implement code of seq2seq model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/17&lt;br /&gt;
|Shipan Ren || 9:30 || 19:30 || 10 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:30 || 24:00 || 9|| &lt;br /&gt;
* code and debug double-decoder model&lt;br /&gt;
* alter 2017/05/14 model's size and will try after nips&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/18&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* read neural machine translation paper&lt;br /&gt;
* read tf_translate code&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:30 || 21:00 || 8 || &lt;br /&gt;
* train double-decoder model on small data set but encounter decode bugs&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/19&lt;br /&gt;
|Aodong Li || 12:30 || 20:30 || 8 || &lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
* the model performs well on develop set, but performs badly on test data. I want to figure out the reason.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/21&lt;br /&gt;
|Aodong Li || 10:30 || 18:30 || 8 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 700 (500 in prior)&lt;br /&gt;
  emb_size = 510 (310 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.21'''&lt;br /&gt;
  But only one checkpoint outperforms the baseline, the other results are commonly under 43.1&lt;br /&gt;
* debug double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/22&lt;br /&gt;
|Aodong Li || 14:00 || 22:00 || 8 || &lt;br /&gt;
*double-decoder without joint loss generalizes very bad&lt;br /&gt;
*i'm trying double-decoder model with joint loss&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/23&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 700&lt;br /&gt;
  emb_size = 510&lt;br /&gt;
  learning_rate = 0.0005 (0.001 in prior)&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data the concat(Chinese, machine translated English), &lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.19'''&lt;br /&gt;
  Overfitting? In overall, the 2nd translator performs worse than baseline&lt;br /&gt;
*details about experiment 2: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  double-decoder model with joint loss which means the final loss  = 1st decoder's loss + 2nd &lt;br /&gt;
  decoder's loss&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''39.04'''&lt;br /&gt;
  The 1st decoder's output is generally better than 2nd decoder's output. The reason may be that &lt;br /&gt;
  the second decoder only learns from the first decoder's hidden states because their states are &lt;br /&gt;
  almost the same.&lt;br /&gt;
*DISCOVERY: &lt;br /&gt;
  The reason why double-decoder without joint loss generalizes very bad is that the gap between&lt;br /&gt;
  force teaching mechanism (training process) and beam search mechanism (decoding process)&lt;br /&gt;
  propagates and expands the error to the output end, which destroys the model when decoding.&lt;br /&gt;
*next:&lt;br /&gt;
  Try to train double-decoder model without joint loss but with beam search on 1st decoder.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Aodong Li || 13:00 || 21:30 || 8 || &lt;br /&gt;
*code double-attention one-decoder model&lt;br /&gt;
*code double-decoder model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/24&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/25&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
*read neural machine translation paper &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* code and debug double attention model&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/27&lt;br /&gt;
|Shipan Ren || 9:30 || 18:30 || 9 || &lt;br /&gt;
*read tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/28&lt;br /&gt;
|Aodong Li || 15:00 || 22:00 || 7 || &lt;br /&gt;
*details about experiment: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  when decoding:&lt;br /&gt;
    final_attn = attn_1 + attn_2 best result of our model: '''43.50'''&lt;br /&gt;
    final_attn = 2/3attn_1 + 4/3attn_2 best result of our model: '''41.22'''&lt;br /&gt;
    final_attn = 4/3attn_1 + 2/3attn_2 best result of our model: '''43.58'''&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/05/30&lt;br /&gt;
|Aodong Li || 15:00 || 21:00 || 6 || &lt;br /&gt;
*details about experiment 1: &lt;br /&gt;
  hidden_size = 500&lt;br /&gt;
  emb_size = 310&lt;br /&gt;
  learning_rate = 0.001&lt;br /&gt;
  small data, &lt;br /&gt;
  2nd translator uses as training data both Chinese and machine translated English&lt;br /&gt;
  Chinese and English use different encoders and different attention&lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''random initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''42.36'''&lt;br /&gt;
* details about experiment 2: &lt;br /&gt;
  '''final_attn = 2/3attn_1 + 4/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.32'''&lt;br /&gt;
* details about experiment 3: &lt;br /&gt;
  '''final_attn = attn_1 + attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.41''' and it seems more stable&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/05/31&lt;br /&gt;
|Shipan Ren || 10:00 || 19:30 || 9.5 || &lt;br /&gt;
*run and test tf_translate code &lt;br /&gt;
*write document of tf_translate project &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 12:00 || 20:30 || 8.5 || &lt;br /&gt;
* details about experiment 1: &lt;br /&gt;
  '''final_attn = 4/3attn_1 + 2/3attn_2'''&lt;br /&gt;
  2nd translator uses '''constant initialized embedding'''&lt;br /&gt;
*results (BLEU): &lt;br /&gt;
  BASELINE: 43.87&lt;br /&gt;
  best result of our model: '''45.79'''&lt;br /&gt;
* That only make English word embedding at encoder constant and train all the other embedding and parameters achieves an even higher bleu score 45.98 and the results are stable.&lt;br /&gt;
* The quality of English embedding at encoder plays an pivotal role in this model.&lt;br /&gt;
* Preparation of big data. &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/01&lt;br /&gt;
|Aodong Li || 13:00 || 24:00 || 11 || &lt;br /&gt;
* Only make the English encoder's embedding constant -- 45.98&lt;br /&gt;
* Only initialize the English encoder's embedding and then finetune it -- 46.06&lt;br /&gt;
* Share the attention mechanism and then directly add them -- 46.20&lt;br /&gt;
* Run double-attention model on large data&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/02&lt;br /&gt;
|Aodong Li || 13:00 || 22:00 || 9 || &lt;br /&gt;
* Baseline bleu on large data is 30.83 with '''30000''' output vocab&lt;br /&gt;
* Our best result is 31.53 with '''20000''' output vocab&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/03&lt;br /&gt;
|Aodong Li || 13:00 || 21:00 || 8 || &lt;br /&gt;
* Train the model with 40 batch size and with concat(attn_1, attn_2)&lt;br /&gt;
* the best result of model with 40 batch size and with add(attn_1, attn_2) is 30.52&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/05&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/06&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/07&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/08&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/09&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/12&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/13&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/14&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/15&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/16&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Prepare for APSIPA paper&lt;br /&gt;
* Read paper about MT involving grammar&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/19&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Completed APSIPA paper&lt;br /&gt;
* Took new task in style translation&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/20&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried synonyms substitution&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/21&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried post edit like synonyms substitution but this didn't work&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/06/22&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/23&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read neural machine translation paper &lt;br /&gt;
* read and run tf_translate code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained a GRU language model to determine similar word&lt;br /&gt;
* This didn't work because semantics is not captured&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/26&lt;br /&gt;
|Shipan Ren || 10:00 || 21:00 || 11 || &lt;br /&gt;
* read paper：LSTM Neural Networks for Language Modeling&lt;br /&gt;
* read and run ViVi_NMT code &lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Tried to figure out new ways to change the text style&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/27&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Trained seq2seq model to solve this problem&lt;br /&gt;
* Semantics are stored in fixed-length vectors by a encoder and a decoder generate sequences on this vector&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/28&lt;br /&gt;
|Shipan Ren || 10:00 || 19:00 || 9 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* installed tensorflow0.1 and tensorflow1.0 on my pc and debugged ViVi_NMT&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Cross-domain seq2seq w/o attention and w/ attention models didn't work because of overfitting&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/29&lt;br /&gt;
|Shipan Ren || 10:00 || 20:00 || 10 || &lt;br /&gt;
* read the API of tensorflow&lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;2&amp;quot;|2017/06/30&lt;br /&gt;
|Shipan Ren || 10:00 || 24:00 || 14 || &lt;br /&gt;
* debugged ViVi_NMT and tried to upgrade code version to tensorflow1.0 (on server)&lt;br /&gt;
* accomplished this task &lt;br /&gt;
* found the new version saves more time，has lower complexity and better bleu than before&lt;br /&gt;
|-&lt;br /&gt;
|Aodong Li || 10:00 || 19:00 || 8 || &lt;br /&gt;
* Read style transfer papers&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/03&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)&lt;br /&gt;
* tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/04&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* recorded experimental results&lt;br /&gt;
* found version 1.0 of the code save more training time, has less complexity and these two version of the code has a similar Bleu value&lt;br /&gt;
* found that the Bleu is still good when the model is over fitting&lt;br /&gt;
* reason: the test set and training set are similar in content and style on small data set&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/05&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English)&lt;br /&gt;
* read NMT papers&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/06&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* out of memory（OOM） error occurred when version 0.1 of code was trained using large data set，but version 1.0 worked&lt;br /&gt;
* reason: improper distribution of resources by the tensorflow0.1 version leads to exhaustion of memory resources&lt;br /&gt;
* I've tried many times, and version 0.1 worked&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/07&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* tested these checkpoints and recorded experimental results&lt;br /&gt;
* the version 1.0 code saved 0.06 second per step than the version 0.1 code&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/08&lt;br /&gt;
|Shipan Ren || 9:00 || 21:00 || 12 || &lt;br /&gt;
* downloaded the wmt2014 data set&lt;br /&gt;
* used the English-French data set to run the code and found the translation is not good&lt;br /&gt;
* reason:no data preprocessing is done&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/10&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English) and tested these checkpoint&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/11&lt;br /&gt;
|Shipan Ren || 9:00 || 20:00 || 11 || &lt;br /&gt;
* &lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/18&lt;br /&gt;
|Jiayu Guo || 8:30|| 22:00 || 14 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/19&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of bleu.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/20&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read papers of attention mechanism.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/21&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/24&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* read model code.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/25&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/26&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/27&lt;br /&gt;
|Jiayu Guo || 10:00|| 24:00 || 14 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/28&lt;br /&gt;
|Jiayu Guo || 9:00|| 24:00 || 15 ||&lt;br /&gt;
* process document&lt;br /&gt;
 &lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/07/31&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* split ancient language text to single word&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/1&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run seq2seq_model&lt;br /&gt;
|&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/2&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/3&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/4&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* search new data(Songshu)&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/7&lt;br /&gt;
|Jiayu Guo || 9:00|| 22:00 || 13 ||&lt;br /&gt;
* process document&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/8&lt;br /&gt;
|Jiayu Guo || 10:00|| 21:00 || 11 ||&lt;br /&gt;
* read tensorflow &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/9&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* run model with the data of which ancient content was split by single character.&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/10&lt;br /&gt;
|Jiayu Guo || 9:00|| 23:00 || 13 ||&lt;br /&gt;
* process data of Songshu&lt;br /&gt;
* read papers of CNN &lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/11&lt;br /&gt;
|Jiayu Guo || 10:00|| 23:00 || 13 ||&lt;br /&gt;
* learn about Graphic Model of LSTM-Projected BPTT&lt;br /&gt;
* search for data available for translation (Twenty-four-Shi)&lt;br /&gt;
|-s&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/12&lt;br /&gt;
|Jiayu Guo || 11:00|| 23:30 || 12 ||&lt;br /&gt;
* run model with data including Shiji、Zizhitongjian.&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/13&lt;br /&gt;
|Jiayu Guo || 13:00||  ||  ||&lt;br /&gt;
* test results.&lt;br /&gt;
checkpoint-100000 translation model&lt;br /&gt;
BLEU： 11.11&lt;br /&gt;
&lt;br /&gt;
*source:在秦者名错，与张仪争论,於是惠王使错将伐蜀，遂拔，因而守之。&lt;br /&gt;
*target:在秦国的名叫司马错，曾与张仪发生争论，秦惠王采纳了他的意见，于是司马错率军攻蜀国，攻取后，又让他做了蜀地郡守。&lt;br /&gt;
*trans：当时秦国的人都很欣赏他的建议，与张仪一起商议，所以吴王派使者率军攻打蜀地，一举攻，接着又下令守城 。&lt;br /&gt;
*source:神大用则竭，形大劳则敝，形神离则死 。 &lt;br /&gt;
*target:精神过度使用就会衰竭，形体过度劳累就会疲惫，神形分离就会死亡。 &lt;br /&gt;
*trans: 精神过度就可衰竭,身体过度劳累就会疲惫，地形也就会死。&lt;br /&gt;
*source:今天子接千岁之统，封泰山，而余不得从行，是命也夫，命也夫！&lt;br /&gt;
*target:现天子继承汉朝千年一统的大业，在泰山举行封禅典礼而我不能随行，这是命啊，是命啊！ &lt;br /&gt;
*trans: 现在天子可以继承帝位的成就爵位，爵位至泰山，而我却未能执行先帝的命运。&lt;br /&gt;
&lt;br /&gt;
*1.data used Zizhitongjian only(6,000 pairs), we can get BLEU 6 at most.&lt;br /&gt;
*2.data used Zizhitongjian only(12,000 pairs), we can get BLEU 7 at most.&lt;br /&gt;
*3.data used Shiji and Zizhitongjian(43,0000 pairs), we can get BLEU about 9.&lt;br /&gt;
*4.data used Shiji and Zizhitongjian(43,0000 pairs), and split the ancient language text one character by one, we can get BLEU 11.11 at most.&lt;br /&gt;
*The main factors now is the data(including pairs of sentence、the quality——cause the modern language text include context information.&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/19&lt;br /&gt;
|Jiayu Guo || 13:00|| 23:00 || 10 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;1&amp;quot;|2017/08/20&lt;br /&gt;
|Jiayu Guo || 13:00|| 22:00 || 9 ||&lt;br /&gt;
* read source code.&lt;br /&gt;
|-&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
===Time Off Table===&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! Date !! Yang Feng !! Jiyuan Zhang &lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
==Past progress==&lt;br /&gt;
[[nlp-progress 2017/03]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/02]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2017/01]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/12]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/11]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/10]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/09]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/08]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/05/01 -- 08/16 | nlp-progress 2016/05-07]]&lt;br /&gt;
&lt;br /&gt;
[[nlp-progress 2016/04]]&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-14</id>
		<title>NLP Status Report 2017-8-14</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-14"/>
				<updated>2017-08-21T00:53:42Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*polished the couplet model &lt;br /&gt;
|| &lt;br /&gt;
Code refactoring for poem system&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* organized the results of the experiment&lt;br /&gt;
* learned how to use THUMT and how did it work&lt;br /&gt;
||&lt;br /&gt;
* train translation models by using THUMT&lt;br /&gt;
* test the bleu of these models&lt;br /&gt;
* compare with our system&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-7-31</id>
		<title>NLP Status Report 2017-7-31</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-7-31"/>
				<updated>2017-08-21T00:51:08Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/7/31&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*made the poster for ACL [http://cslt.riit.tsinghua.edu.cn/mediawiki/images/9/95/Acl2017-poster.pdf]&lt;br /&gt;
*attempted to fix repeated word, but failed&lt;br /&gt;
*done some work of n-gram model of the couplet&lt;br /&gt;
|| &lt;br /&gt;
*generate streame according to a couplet&lt;br /&gt;
*complete the task of filling in the blanks of a couplet&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
* Got 55,000+ Englsih poems and 260,000+ lines after preprocessing&lt;br /&gt;
* Added phase separators as the style indicator, and every line has at least one separator&lt;br /&gt;
* Training loss didn't decrease very much, only from 440 to 50&lt;br /&gt;
* The translation quality deteriorated when added language model&lt;br /&gt;
||&lt;br /&gt;
* Try to use a larger language model to decrease the training loss&lt;br /&gt;
* Try to use character-based MT in English-Chinese translation&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* looked for the performance(the bleu value) of other models &lt;br /&gt;
  on the WMT2014 dataset from the published papers,but not found.&lt;br /&gt;
* installed and built Moses on the server   &lt;br /&gt;
||&lt;br /&gt;
* train statistical machine translation model and test it &lt;br /&gt;
  toolkit: Moses&lt;br /&gt;
  data sets:WMT2014 en-de、en-fr data sets&lt;br /&gt;
* collate experimental results.compare our baseline model with Moses &lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
*process document.Until now, Shiji has been split up to 2,4000 pairs of sentence.&lt;br /&gt;
*Zizhitongjian has been split up to 1,6000 pairs.&lt;br /&gt;
||&lt;br /&gt;
*adjust jieba source code, in order to make jieba more accurate for ancient language wordpiece&lt;br /&gt;
*read model source code&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-7-24</id>
		<title>NLP Status Report 2017-7-24</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-7-24"/>
				<updated>2017-08-21T00:50:49Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/7/24&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*&lt;br /&gt;
|| &lt;br /&gt;
*make the poster for ACL&lt;br /&gt;
*complete neural model for the couplet&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
* Completed the shallow fusion of news-domain translation with dialog-domain style.&lt;br /&gt;
* The style was not obvious since the dialog dataset has no specific style indicator.&lt;br /&gt;
* Some examples:&lt;br /&gt;
  全程 预计 00 天 , 团费 大约 0.0万 元 人民币 。&lt;br /&gt;
  w/ style: it is estimated that 00 days of the entire project will be about 00 million yuan .&lt;br /&gt;
  w/o style: the whole world is expected to be about 00 days , with a total of 00,000 yuan rmb .&lt;br /&gt;
&lt;br /&gt;
  在 美国 九一一 恐怖 攻击 周年 左右 , 东南亚 各 地 的 西方 外交 使节 团 纷纷 关闭 , 因为 &lt;br /&gt;
  它们 遭到 与 欧萨玛 . 宾 拉登 的 盖 达 组织 及 其 地方 联盟 有关 的 威胁 。&lt;br /&gt;
  w/ style: on the anniversary of the sept 0 terrorist attack , the western dpp diplomatic &lt;br /&gt;
  envoys in southeast asia were shut down because they were with the threat to al qaeda&lt;br /&gt;
  bin laden and al - qaeda 's relevant alliance .&lt;br /&gt;
  w/o style: on the anniversary of the sept 0 terrorist attack , the western dpp diplomatic envoys&lt;br /&gt;
  in southeast asia were shut off because they were closely connected with osama bin laden 's al &lt;br /&gt;
  qaeda and al - qaeda 's relevant alliances .&lt;br /&gt;
||&lt;br /&gt;
* Find the dataset with obvious style indicators.&lt;br /&gt;
* Try to quantify the result to determine if it is effective.&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* trained two models of the baseline using WMT2014 en-fr datasets&lt;br /&gt;
  under training &lt;br /&gt;
  new version saved more time&lt;br /&gt;
&lt;br /&gt;
* read some papers（memory-augmented-nmt and Memory augmented Chinese-Uyghur Neural Machine Translation）   &lt;br /&gt;
||&lt;br /&gt;
* read memory-augmented-nmt code&lt;br /&gt;
* read papers about memory augmented NMT &lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-7-17</id>
		<title>NLP Status Report 2017-7-17</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-7-17"/>
				<updated>2017-08-21T00:50:33Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/7/17&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*&lt;br /&gt;
|| &lt;br /&gt;
*generate streame according to a couplet  &lt;br /&gt;
*try my best to complete the task of filling in the blanks of a couplet&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
*&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
*&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* found ways to tokenize the WMT2014 data &lt;br /&gt;
   rewrote prepare_data.py form moses-smt&lt;br /&gt;
   used the tokenizer of moses-smt&lt;br /&gt;
&lt;br /&gt;
*train two versions of the code on WMT2014 en-de and en-fr datasets&lt;br /&gt;
   tested these checkpoints of en-de dataset&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
* tested these checkpoints of en-fr dataset&lt;br /&gt;
* record the result and do analysis &lt;br /&gt;
* read papers about memory augmented NMT &lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-7</id>
		<title>NLP Status Report 2017-8-7</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-7"/>
				<updated>2017-08-21T00:48:52Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/7&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*generated streame according to a couplet&lt;br /&gt;
*almost completed the task of filling in the blanks of a couplet&lt;br /&gt;
|| &lt;br /&gt;
*continue to perfect the couplet model&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* train statistical machine translation model and test it &lt;br /&gt;
  toolkit: Moses&lt;br /&gt;
  data sets:WMT2014 en-de、en-fr data sets&lt;br /&gt;
* collate experimental results.compare our baseline model with Moses &lt;br /&gt;
  en-de dataset&lt;br /&gt;
  Moses:15.4&lt;br /&gt;
  Baseline:14.87&lt;br /&gt;
  &lt;br /&gt;
  en-fr datasets&lt;br /&gt;
  under training&lt;br /&gt;
||&lt;br /&gt;
* read memory-augment NMT code &lt;br /&gt;
* think about the next step work&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
*process document.Until now, Shiji has been split up to 2,4000 pairs of sentence.&lt;br /&gt;
*Zizhitongjian has been split up to 1,6000 pairs.&lt;br /&gt;
||&lt;br /&gt;
*adjust jieba source code, in order to make jieba more accurate for ancient language wordpiece&lt;br /&gt;
*read model source code&lt;br /&gt;
|-&lt;br /&gt;
||&lt;br /&gt;
||&lt;br /&gt;
*run Shiji-based model&lt;br /&gt;
*combine Shiji and Zizhitongjian,then run the model&lt;br /&gt;
在 秦国 做国 人 的 第二 ， 与 张仪 一起 进言 ， 于是 派 司马错 率兵 攻打 蜀地 ， 于是 攻克 了 ， 乘势 攻下 了 。&lt;br /&gt;
因 冯直 有 的 言辞 和 他 的 情况 ， 往往 得不到 安宁 罢了 。&lt;br /&gt;
　 　 到 了 道术 _UNK 的 方法 ， 弃 _UNK _UNK ， _UNK _UNK ， 赦免 天时 ， 使人 著 儒家 的 态度 。&lt;br /&gt;
　 　 教化 礼制 _UNK 法度 ， 一定 要 根据 这 方法 发展 治理 百姓 。&lt;br /&gt;
　 　 无 德行 的 人 ， 无不 分明 ， 所以 能够 忍受 八万余 人 。&lt;br /&gt;
　 　 _UNK _UNK 就 完备 了 ， 那么 国家 的 强盛 就 会 疲惫 ， _UNK 鬼神 就 会&lt;br /&gt;
　 　 当今 天子 到 各 封地 ， 建立 到 长安 ， 而 国内 没有 得到 右翼 ， 就 当上 了 出使 最下 等 的 事 。 我 死 了 ， 你 必然 当上 了 大官 ； 不 为 我 要死 呢 。&lt;br /&gt;
　 　 孔子 说 ： ‘ 我 想 把 她 作 俸禄 的 恶名 ， 不 在于 见到 他 的 行事 ， 就 感动 得 很 容易 考虑 。&lt;br /&gt;
　 　 他 的 部属 都 认为 好人 很 好 ， 认为 他 不 了解 他 的 大义 ， 被 处以 死刑 的 刑罚 ， 不敢 再 推辞 。&lt;br /&gt;
汉 王朝 兴起 ， 到达 天子 ， 平定 华夏 _UNK ， 消除 _UNK ， 消除 _UNK ， 整治 _UNK ， 统一 度量衡 ， 整治 满足 本人 的 需要 ， 准备 调节 _UNK ， 请 举行 _UNK 。&lt;br /&gt;
　 　 韩厥 慈爱 孝顺 的 功绩 ， _UNK _UNK 为 他 。&lt;br /&gt;
作 《 项羽 本纪 》 第二十七 。&lt;br /&gt;
　 　 《 春秋 》 以后 ， 诸侯 独断专行 ， 安抚 四方 国家 ； _UNK 到 秦国 ， 最终 并吞 夏朝 的 土地 ， 消灭 了 周室 ， 他 的 封号 。&lt;br /&gt;
　 　 作 《 魏公子 _UNK 列传 》 _UNK 。&lt;br /&gt;
太公 、 孙子 、 吴 、 王子 继位 则 有 明智 而 有 明智 之心 ， 违背 天道 ， 无不 分明 。&lt;br /&gt;
　 　 运筹帷幄之中 ， 安抚 耕种 ； 他 的 运行 则 驰骋 弋猎 的 声音 ， 招来 了 一套 _UNK 的 利益 ， 违背 风俗 ， 补救 积弊 ， 去掉 节俭 的 性能 。&lt;br /&gt;
　 　 成王 年龄 尚幼 ， 公室 非常 怀疑 他 ， 淮夷 背叛 他 ， 于是 召公 振兴 了 成王 ， 占有 了 天下 的 政权 。&lt;br /&gt;
}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-14</id>
		<title>NLP Status Report 2017-8-14</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-14"/>
				<updated>2017-08-21T00:48:04Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/8/14&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*polished the couplet model &lt;br /&gt;
|| &lt;br /&gt;
Code refactoring for poem system&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* organized the results of the experiment&lt;br /&gt;
* learned how to use THUMT and how did it work&lt;br /&gt;
||&lt;br /&gt;
* train translation models by using THUMT&lt;br /&gt;
* test the bleu of these models&lt;br /&gt;
* compared with our system&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-14</id>
		<title>NLP Status Report 2017-8-14</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-14"/>
				<updated>2017-08-21T00:36:22Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/7/3&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*polished the couplet model &lt;br /&gt;
|| &lt;br /&gt;
Code refactoring for poem system&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-14</id>
		<title>NLP Status Report 2017-8-14</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-14"/>
				<updated>2017-08-21T00:35:58Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/7/3&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*polished the couplet model &lt;br /&gt;
||&lt;br /&gt;
Code refactoring for poem system&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
||&lt;br /&gt;
|-&lt;br /&gt;
&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/7/3&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*polished the couplet model &lt;br /&gt;
|| &lt;br /&gt;
Code refactoring for poem system&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-7-10</id>
		<title>NLP Status Report 2017-7-10</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-7-10"/>
				<updated>2017-08-21T00:31:04Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/7/10&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*reproduced the couplet model using moses&lt;br /&gt;
|| &lt;br /&gt;
*continue to modify the couplet&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
* Tried a seq2seq with style code model but it didn't work.&lt;br /&gt;
* Coded attention-based seq2seq NMT in shallow fusion with a language model.&lt;br /&gt;
||&lt;br /&gt;
* Complete coding and have a try. &lt;br /&gt;
* Find more monolingual corpus and upgrade the model.&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* run two versions of the code on small data sets (Chinese-English)  and tested these checkpoint&lt;br /&gt;
     found version 1.0 save time about 0.03s  per step, &lt;br /&gt;
           and these two version  has  similar complexity and bleu values &lt;br /&gt;
     found that the bleu is still good when the model is over fitting .&lt;br /&gt;
           (reason: the test set and the train set of small data set are similar in content and style) &lt;br /&gt;
* run two versions of the code on big data sets (Chinese-English) . &lt;br /&gt;
     OOM（Out Of Memory） error occurred when version 0.1 was trained using large data set，but version 1.0 worked &lt;br /&gt;
          reason: improper distribution of resources by the tensorflow0.1 frame leads to exhaustion of memory resources &lt;br /&gt;
     I had tried 4 times （just enter the same command）, and version 0.1 worked &lt;br /&gt;
          found version 1.0 save time about 0.06s  per step, and these two version  has  similar complexity and bleu values &lt;br /&gt;
* downloaded the wmt2014 data set ,used the English-French data set to run the code and &lt;br /&gt;
    found the translation is not good (reason:improper word segmentation)&lt;br /&gt;
||&lt;br /&gt;
* do word segmentation on wmt2014  data set &lt;br /&gt;
* run two versions of the code on wmt2014  data set &lt;br /&gt;
* record the result and do analysis &lt;br /&gt;
* learn and train moses(use big data sets (Chinese-English))&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-7</id>
		<title>NLP Status Report 2017-8-7</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-8-7"/>
				<updated>2017-08-07T05:08:16Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/7/3&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*generated streame according to a couplet&lt;br /&gt;
*almost completed the task of filling in the blanks of a couplet&lt;br /&gt;
|| &lt;br /&gt;
*continue to perfect the couplet model&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* train statistical machine translation model and test it &lt;br /&gt;
  toolkit: Moses&lt;br /&gt;
  data sets:WMT2014 en-de、en-fr data sets&lt;br /&gt;
* collate experimental results.compare our baseline model with Moses &lt;br /&gt;
  en-de dataset&lt;br /&gt;
  Moses:15.4&lt;br /&gt;
  Baseline:14.87&lt;br /&gt;
  &lt;br /&gt;
  en-fr datasets&lt;br /&gt;
  under training&lt;br /&gt;
||&lt;br /&gt;
* read memory-augment NMT code &lt;br /&gt;
* think about the next step work&lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
*process document.Until now, Shiji has been split up to 2,4000 pairs of sentence.&lt;br /&gt;
*Zizhitongjian has been split up to 1,6000 pairs.&lt;br /&gt;
||&lt;br /&gt;
*adjust jieba source code, in order to make jieba more accurate for ancient language wordpiece&lt;br /&gt;
*read model source code&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	<entry>
		<id>http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-7-31</id>
		<title>NLP Status Report 2017-7-31</title>
		<link rel="alternate" type="text/html" href="http://index.cslt.org/mediawiki/index.php/NLP_Status_Report_2017-7-31"/>
				<updated>2017-07-31T04:57:55Z</updated>
		
		<summary type="html">&lt;p&gt;Renshipan：&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Date !! People !! Last Week !! This Week&lt;br /&gt;
|-&lt;br /&gt;
| rowspan=&amp;quot;6&amp;quot;|2017/7/3&lt;br /&gt;
|Jiyuan Zhang ||&lt;br /&gt;
*made the poster for ACL&lt;br /&gt;
*attempted to fix repeated word, but failed&lt;br /&gt;
*done some work of n-gram model of the couplet&lt;br /&gt;
|| &lt;br /&gt;
*generate streame according to a couplet&lt;br /&gt;
*complete the task of filling in the blanks of a couplet&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Aodong LI ||&lt;br /&gt;
* Got 55,000+ Englsih poems and 260,000+ lines after preprocessing&lt;br /&gt;
* Added phase separators as the style indicator, and every line has at least one separator&lt;br /&gt;
* Training loss didn't decrease very much, only from 440 to 50&lt;br /&gt;
* The translation quality deteriorated when added language model&lt;br /&gt;
||&lt;br /&gt;
* Try to use a larger language model to decrease the training loss&lt;br /&gt;
* Try to use character-based MT in English-Chinese translation&lt;br /&gt;
|-&lt;br /&gt;
|Shiyue Zhang || &lt;br /&gt;
&lt;br /&gt;
||&lt;br /&gt;
&lt;br /&gt;
|-&lt;br /&gt;
|Shipan Ren ||&lt;br /&gt;
* looked for the performance(the bleu value) of other models &lt;br /&gt;
  on the WMT2014 dataset from the published papers,but not found.&lt;br /&gt;
* installed and built Moses on the server   &lt;br /&gt;
||&lt;br /&gt;
* train statistical machine translation model and test it &lt;br /&gt;
  toolkit: Moses&lt;br /&gt;
  data sets:WMT2014 en-de、en-fr data sets&lt;br /&gt;
* collate experimental results.compare our baseline model with Moses &lt;br /&gt;
|-&lt;br /&gt;
    &lt;br /&gt;
|Jiayu Guo||&lt;br /&gt;
*process document.&lt;br /&gt;
*Shiji has been split up to 2,5000 pairs of sentence.&lt;br /&gt;
*Zizhitongjian has been split up to 2,0000 pairs.&lt;br /&gt;
||&lt;br /&gt;
*adjust jieba source code&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Renshipan</name></author>	</entry>

	</feed>